diff --git a/.gitignore b/.gitignore index 2d67173eb..b8f38989b 100644 --- a/.gitignore +++ b/.gitignore @@ -36,6 +36,7 @@ /asynclook /delayer /dohclient +/doqclient /lock-verify /memstats /perf diff --git a/Makefile.in b/Makefile.in index 672435e01..c262250ca 100644 --- a/Makefile.in +++ b/Makefile.in @@ -179,11 +179,11 @@ testcode/unitlruhash.c testcode/unitmain.c testcode/unitmsgparse.c \ testcode/unitneg.c testcode/unitregional.c testcode/unitslabhash.c \ testcode/unitverify.c testcode/readhex.c testcode/testpkts.c testcode/unitldns.c \ testcode/unitecs.c testcode/unitauth.c testcode/unitzonemd.c \ -testcode/unittcpreuse.c +testcode/unittcpreuse.c testcode/unitdoq.c UNITTEST_OBJ=unitanchor.lo unitdname.lo unitlruhash.lo unitmain.lo \ unitmsgparse.lo unitneg.lo unitregional.lo unitslabhash.lo unitverify.lo \ readhex.lo testpkts.lo unitldns.lo unitecs.lo unitauth.lo unitzonemd.lo \ -unittcpreuse.lo +unittcpreuse.lo unitdoq.lo UNITTEST_OBJ_LINK=$(UNITTEST_OBJ) worker_cb.lo $(COMMON_OBJ) $(SLDNS_OBJ) \ $(COMPAT_OBJ) DAEMON_SRC=daemon/acl_list.c daemon/cachedump.c daemon/daemon.c \ @@ -242,6 +242,10 @@ DOHCLIENT_SRC=testcode/dohclient.c DOHCLIENT_OBJ=dohclient.lo DOHCLIENT_OBJ_LINK=$(DOHCLIENT_OBJ) worker_cb.lo $(COMMON_OBJ) $(COMPAT_OBJ) \ $(SLDNS_OBJ) +DOQCLIENT_SRC=testcode/doqclient.c +DOQCLIENT_OBJ=doqclient.lo +DOQCLIENT_OBJ_LINK=$(DOQCLIENT_OBJ) $(COMMON_OBJ) $(COMPAT_OBJ) \ +$(SLDNS_OBJ) PERF_SRC=testcode/perf.c PERF_OBJ=perf.lo PERF_OBJ_LINK=$(PERF_OBJ) worker_cb.lo $(COMMON_OBJ) $(COMPAT_OBJ) $(SLDNS_OBJ) @@ -288,7 +292,7 @@ ALL_SRC=$(COMMON_SRC) $(UNITTEST_SRC) $(DAEMON_SRC) \ $(CONTROL_SRC) $(UBANCHOR_SRC) $(PETAL_SRC) $(DNSTAP_SOCKET_SRC)\ $(PYTHONMOD_SRC) $(PYUNBOUND_SRC) $(WIN_DAEMON_THE_SRC) \ $(SVCINST_SRC) $(SVCUNINST_SRC) $(ANCHORUPD_SRC) $(SLDNS_SRC) \ - $(DOHCLIENT_SRC) $(READZONE_SRC) + $(DOHCLIENT_SRC) $(DOQCLIENT_SRC) $(READZONE_SRC) ALL_OBJ=$(COMMON_OBJ) $(UNITTEST_OBJ) $(DAEMON_OBJ) \ $(TESTBOUND_OBJ) $(LOCKVERIFY_OBJ) $(PKTVIEW_OBJ) \ @@ -297,7 +301,7 @@ ALL_OBJ=$(COMMON_OBJ) $(UNITTEST_OBJ) $(DAEMON_OBJ) \ $(CONTROL_OBJ) $(UBANCHOR_OBJ) $(PETAL_OBJ) $(DNSTAP_SOCKET_OBJ)\ $(COMPAT_OBJ) $(PYUNBOUND_OBJ) \ $(SVCINST_OBJ) $(SVCUNINST_OBJ) $(ANCHORUPD_OBJ) $(SLDNS_OBJ) \ - $(DOHCLIENT_OBJ) $(READZONE_OBJ) + $(DOHCLIENT_OBJ) $(DOQCLIENT_OBJ) $(READZONE_OBJ) COMPILE=$(LIBTOOL) --tag=CC --mode=compile $(CC) $(CPPFLAGS) $(CFLAGS) @PTHREAD_CFLAGS_ONLY@ LINK=$(LIBTOOL) --tag=CC --mode=link $(CC) $(staticexe) $(RUNTIME_PATH) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) @@ -334,7 +338,7 @@ rsrc_unbound_checkconf.o: $(srcdir)/winrc/rsrc_unbound_checkconf.rc config.h TEST_BIN=asynclook$(EXEEXT) delayer$(EXEEXT) \ lock-verify$(EXEEXT) memstats$(EXEEXT) perf$(EXEEXT) \ petal$(EXEEXT) pktview$(EXEEXT) streamtcp$(EXEEXT) \ - $(DNSTAP_SOCKET_TESTBIN) dohclient$(EXEEXT) \ + $(DNSTAP_SOCKET_TESTBIN) dohclient$(EXEEXT) doqclient$(EXEEXT) \ testbound$(EXEEXT) unittest$(EXEEXT) readzone$(EXEEXT) tests: all $(TEST_BIN) @@ -416,6 +420,9 @@ streamtcp$(EXEEXT): $(STREAMTCP_OBJ_LINK) dohclient$(EXEEXT): $(DOHCLIENT_OBJ_LINK) $(LINK) -o $@ $(DOHCLIENT_OBJ_LINK) $(SSLLIB) $(LIBS) +doqclient$(EXEEXT): $(DOQCLIENT_OBJ_LINK) + $(LINK) -o $@ $(DOQCLIENT_OBJ_LINK) $(SSLLIB) $(LIBS) + perf$(EXEEXT): $(PERF_OBJ_LINK) $(LINK) -o $@ $(PERF_OBJ_LINK) $(SSLLIB) $(LIBS) @@ -703,6 +710,8 @@ depend: # build rules ipset.lo ipset.o: $(srcdir)/ipset/ipset.c +doqclient.lo doqclient.o: $(srcdir)/testcode/doqclient.c +unitdoq.lo unitdoq.o: $(srcdir)/testcode/unitdoq.c # Dependencies dns.lo dns.o: $(srcdir)/services/cache/dns.c config.h $(srcdir)/iterator/iter_delegpt.h $(srcdir)/util/log.h \ diff --git a/config.h.in b/config.h.in index 099206025..b3a94fb34 100644 --- a/config.h.in +++ b/config.h.in @@ -129,6 +129,14 @@ and to 0 if you don't. */ #undef HAVE_DECL_NGHTTP2_SESSION_SERVER_NEW +/* Define to 1 if you have the declaration of `ngtcp2_conn_server_new', and to + 0 if you don't. */ +#undef HAVE_DECL_NGTCP2_CONN_SERVER_NEW + +/* Define to 1 if you have the declaration of `ngtcp2_crypto_encrypt_cb', and + to 0 if you don't. */ +#undef HAVE_DECL_NGTCP2_CRYPTO_ENCRYPT_CB + /* Define to 1 if you have the declaration of `NID_ED25519', and to 0 if you don't. */ #undef HAVE_DECL_NID_ED25519 @@ -421,6 +429,65 @@ /* Define to 1 if you have the header file. */ #undef HAVE_NGHTTP2_NGHTTP2_H +/* Define this to use ngtcp2. */ +#undef HAVE_NGTCP2 + +/* Define to 1 if you have the `ngtcp2_ccerr_default' function. */ +#undef HAVE_NGTCP2_CCERR_DEFAULT + +/* Define to 1 if you have the `ngtcp2_conn_encode_0rtt_transport_params' + function. */ +#undef HAVE_NGTCP2_CONN_ENCODE_0RTT_TRANSPORT_PARAMS + +/* Define to 1 if you have the `ngtcp2_conn_get_max_local_streams_uni' + function. */ +#undef HAVE_NGTCP2_CONN_GET_MAX_LOCAL_STREAMS_UNI + +/* Define to 1 if you have the `ngtcp2_conn_get_num_scid' function. */ +#undef HAVE_NGTCP2_CONN_GET_NUM_SCID + +/* Define to 1 if you have the `ngtcp2_conn_in_closing_period' function. */ +#undef HAVE_NGTCP2_CONN_IN_CLOSING_PERIOD + +/* Define to 1 if you have the `ngtcp2_conn_in_draining_period' function. */ +#undef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD + +/* Define if ngtcp2_conn_shutdown_stream has 4 arguments. */ +#undef HAVE_NGTCP2_CONN_SHUTDOWN_STREAM4 + +/* Define to 1 if you have the `ngtcp2_conn_tls_early_data_rejected' function. + */ +#undef HAVE_NGTCP2_CONN_TLS_EARLY_DATA_REJECTED + +/* Define to 1 if you have the `ngtcp2_crypto_encrypt_cb' function. */ +#undef HAVE_NGTCP2_CRYPTO_ENCRYPT_CB + +/* Define to 1 if you have the + `ngtcp2_crypto_quictls_configure_client_context' function. */ +#undef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_CLIENT_CONTEXT + +/* Define to 1 if you have the + `ngtcp2_crypto_quictls_configure_server_context' function. */ +#undef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT + +/* Define to 1 if you have the + `ngtcp2_crypto_quictls_from_ossl_encryption_level' function. */ +#undef HAVE_NGTCP2_CRYPTO_QUICTLS_FROM_OSSL_ENCRYPTION_LEVEL + +/* Define to 1 if the system has the type `ngtcp2_encryption_level'. */ +#undef HAVE_NGTCP2_ENCRYPTION_LEVEL + +/* Define to 1 if you have the header file. + */ +#undef HAVE_NGTCP2_NGTCP2_CRYPTO_OPENSSL_H + +/* Define to 1 if you have the header file. + */ +#undef HAVE_NGTCP2_NGTCP2_CRYPTO_QUICTLS_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_NGTCP2_NGTCP2_H + /* Use libnss for crypto */ #undef HAVE_NSS @@ -587,6 +654,9 @@ /* Define to 1 if you have the `SSL_get1_peer_certificate' function. */ #undef HAVE_SSL_GET1_PEER_CERTIFICATE +/* Define to 1 if you have the `SSL_is_quic' function. */ +#undef HAVE_SSL_IS_QUIC + /* Define to 1 if you have the `SSL_set1_host' function. */ #undef HAVE_SSL_SET1_HOST @@ -629,6 +699,23 @@ /* Define to 1 if `ipi_spec_dst' is a member of `struct in_pktinfo'. */ #undef HAVE_STRUCT_IN_PKTINFO_IPI_SPEC_DST +/* Define to 1 if `tokenlen' is a member of `struct ngtcp2_pkt_hd'. */ +#undef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN + +/* Define to 1 if `max_tx_udp_payload_size' is a member of `struct + ngtcp2_settings'. */ +#undef HAVE_STRUCT_NGTCP2_SETTINGS_MAX_TX_UDP_PAYLOAD_SIZE + +/* Define to 1 if `tokenlen' is a member of `struct ngtcp2_settings'. */ +#undef HAVE_STRUCT_NGTCP2_SETTINGS_TOKENLEN + +/* Define to 1 if `original_dcid_present' is a member of `struct + ngtcp2_transport_params'. */ +#undef HAVE_STRUCT_NGTCP2_TRANSPORT_PARAMS_ORIGINAL_DCID_PRESENT + +/* Define to 1 if the system has the type `struct ngtcp2_version_cid'. */ +#undef HAVE_STRUCT_NGTCP2_VERSION_CID + /* Define to 1 if `sun_len' is a member of `struct sockaddr_un'. */ #undef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN @@ -1521,6 +1608,8 @@ char *unbound_stat_strdup_log(const char *s, const char* file, int line, #define UNBOUND_DNS_OVER_TLS_PORT 853 /** default port for DNS over HTTPS traffic. */ #define UNBOUND_DNS_OVER_HTTPS_PORT 443 +/** default port for DNS over QUIC traffic. */ +#define UNBOUND_DNS_OVER_QUIC_PORT 853 /** default port for unbound control traffic, registered port with IANA, ub-dns-control 8953/tcp unbound dns nameserver control */ #define UNBOUND_CONTROL_PORT 8953 diff --git a/configure b/configure index 5b927544f..918a06320 100755 --- a/configure +++ b/configure @@ -921,6 +921,7 @@ with_libevent with_libexpat with_libhiredis with_libnghttp2 +with_libngtcp2 enable_static_exe enable_fully_static enable_lock_checks @@ -1709,6 +1710,7 @@ Optional Packages: --with-libexpat=path specify explicit path for libexpat. --with-libhiredis=path specify explicit path for libhiredis. --with-libnghttp2=path specify explicit path for libnghttp2. + --with-libngtcp2=path specify explicit path for libngtcp2, for QUIC. --with-dnstap-socket-path=pathname set default dnstap socket path --with-protobuf-c=path Path where protobuf-c is installed, for dnstap @@ -22205,6 +22207,353 @@ printf "%s\n" "#define HAVE_DECL_NGHTTP2_SESSION_SERVER_NEW $ac_have_decl" >>con fi +# ngtcp2 + +# Check whether --with-libngtcp2 was given. +if test ${with_libngtcp2+y} +then : + withval=$with_libngtcp2; +else $as_nop + withval="no" +fi + +found_libngtcp2="no" +if test x_$withval = x_yes -o x_$withval != x_no; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for libngtcp2" >&5 +printf %s "checking for libngtcp2... " >&6; } + if test x_$withval = x_ -o x_$withval = x_yes; then + withval="/usr/local /opt/local /usr/lib /usr/pkg /usr/sfw /usr" + fi + for dir in $withval ; do + if test -f "$dir/include/ngtcp2/ngtcp2.h"; then + found_libngtcp2="yes" + if test "$dir" != "/usr"; then + CPPFLAGS="$CPPFLAGS -I$dir/include" + LDFLAGS="$LDFLAGS -L$dir/lib" + fi + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: found in $dir" >&5 +printf "%s\n" "found in $dir" >&6; } + +printf "%s\n" "#define HAVE_NGTCP2 1" >>confdefs.h + + LIBS="$LIBS -lngtcp2" + break; + fi + done + if test x_$found_libngtcp2 != x_yes; then + as_fn_error $? "Could not find libngtcp2, ngtcp2.h" "$LINENO" 5 + fi + ac_fn_c_check_header_compile "$LINENO" "ngtcp2/ngtcp2.h" "ac_cv_header_ngtcp2_ngtcp2_h" "$ac_includes_default +" +if test "x$ac_cv_header_ngtcp2_ngtcp2_h" = xyes +then : + printf "%s\n" "#define HAVE_NGTCP2_NGTCP2_H 1" >>confdefs.h + +fi +ac_fn_c_check_header_compile "$LINENO" "ngtcp2/ngtcp2_crypto_openssl.h" "ac_cv_header_ngtcp2_ngtcp2_crypto_openssl_h" "$ac_includes_default +" +if test "x$ac_cv_header_ngtcp2_ngtcp2_crypto_openssl_h" = xyes +then : + printf "%s\n" "#define HAVE_NGTCP2_NGTCP2_CRYPTO_OPENSSL_H 1" >>confdefs.h + +fi +ac_fn_c_check_header_compile "$LINENO" "ngtcp2/ngtcp2_crypto_quictls.h" "ac_cv_header_ngtcp2_ngtcp2_crypto_quictls_h" "$ac_includes_default +" +if test "x$ac_cv_header_ngtcp2_ngtcp2_crypto_quictls_h" = xyes +then : + printf "%s\n" "#define HAVE_NGTCP2_NGTCP2_CRYPTO_QUICTLS_H 1" >>confdefs.h + +fi + + ac_fn_check_decl "$LINENO" "ngtcp2_conn_server_new" "ac_cv_have_decl_ngtcp2_conn_server_new" "$ac_includes_default + #include + +" "$ac_c_undeclared_builtin_options" "CFLAGS" +if test "x$ac_cv_have_decl_ngtcp2_conn_server_new" = xyes +then : + ac_have_decl=1 +else $as_nop + ac_have_decl=0 +fi +printf "%s\n" "#define HAVE_DECL_NGTCP2_CONN_SERVER_NEW $ac_have_decl" >>confdefs.h + + ac_fn_check_decl "$LINENO" "ngtcp2_crypto_encrypt_cb" "ac_cv_have_decl_ngtcp2_crypto_encrypt_cb" "$ac_includes_default + #include + +" "$ac_c_undeclared_builtin_options" "CFLAGS" +if test "x$ac_cv_have_decl_ngtcp2_crypto_encrypt_cb" = xyes +then : + ac_have_decl=1 +else $as_nop + ac_have_decl=0 +fi +printf "%s\n" "#define HAVE_DECL_NGTCP2_CRYPTO_ENCRYPT_CB $ac_have_decl" >>confdefs.h + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for ngtcp2_crypto_encrypt_cb in -lngtcp2_crypto_openssl" >&5 +printf %s "checking for ngtcp2_crypto_encrypt_cb in -lngtcp2_crypto_openssl... " >&6; } +if test ${ac_cv_lib_ngtcp2_crypto_openssl_ngtcp2_crypto_encrypt_cb+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lngtcp2_crypto_openssl $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char ngtcp2_crypto_encrypt_cb (); +int +main (void) +{ +return ngtcp2_crypto_encrypt_cb (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_ngtcp2_crypto_openssl_ngtcp2_crypto_encrypt_cb=yes +else $as_nop + ac_cv_lib_ngtcp2_crypto_openssl_ngtcp2_crypto_encrypt_cb=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_ngtcp2_crypto_openssl_ngtcp2_crypto_encrypt_cb" >&5 +printf "%s\n" "$ac_cv_lib_ngtcp2_crypto_openssl_ngtcp2_crypto_encrypt_cb" >&6; } +if test "x$ac_cv_lib_ngtcp2_crypto_openssl_ngtcp2_crypto_encrypt_cb" = xyes +then : + LIBS="$LIBS -lngtcp2_crypto_openssl" +fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for ngtcp2_crypto_encrypt_cb in -lngtcp2_crypto_quictls" >&5 +printf %s "checking for ngtcp2_crypto_encrypt_cb in -lngtcp2_crypto_quictls... " >&6; } +if test ${ac_cv_lib_ngtcp2_crypto_quictls_ngtcp2_crypto_encrypt_cb+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lngtcp2_crypto_quictls $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char ngtcp2_crypto_encrypt_cb (); +int +main (void) +{ +return ngtcp2_crypto_encrypt_cb (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_ngtcp2_crypto_quictls_ngtcp2_crypto_encrypt_cb=yes +else $as_nop + ac_cv_lib_ngtcp2_crypto_quictls_ngtcp2_crypto_encrypt_cb=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_ngtcp2_crypto_quictls_ngtcp2_crypto_encrypt_cb" >&5 +printf "%s\n" "$ac_cv_lib_ngtcp2_crypto_quictls_ngtcp2_crypto_encrypt_cb" >&6; } +if test "x$ac_cv_lib_ngtcp2_crypto_quictls_ngtcp2_crypto_encrypt_cb" = xyes +then : + LIBS="$LIBS -lngtcp2_crypto_quictls" +fi + + ac_fn_c_check_func "$LINENO" "ngtcp2_crypto_encrypt_cb" "ac_cv_func_ngtcp2_crypto_encrypt_cb" +if test "x$ac_cv_func_ngtcp2_crypto_encrypt_cb" = xyes +then : + printf "%s\n" "#define HAVE_NGTCP2_CRYPTO_ENCRYPT_CB 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "ngtcp2_ccerr_default" "ac_cv_func_ngtcp2_ccerr_default" +if test "x$ac_cv_func_ngtcp2_ccerr_default" = xyes +then : + printf "%s\n" "#define HAVE_NGTCP2_CCERR_DEFAULT 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "ngtcp2_conn_in_closing_period" "ac_cv_func_ngtcp2_conn_in_closing_period" +if test "x$ac_cv_func_ngtcp2_conn_in_closing_period" = xyes +then : + printf "%s\n" "#define HAVE_NGTCP2_CONN_IN_CLOSING_PERIOD 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "ngtcp2_conn_in_draining_period" "ac_cv_func_ngtcp2_conn_in_draining_period" +if test "x$ac_cv_func_ngtcp2_conn_in_draining_period" = xyes +then : + printf "%s\n" "#define HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "ngtcp2_conn_get_max_local_streams_uni" "ac_cv_func_ngtcp2_conn_get_max_local_streams_uni" +if test "x$ac_cv_func_ngtcp2_conn_get_max_local_streams_uni" = xyes +then : + printf "%s\n" "#define HAVE_NGTCP2_CONN_GET_MAX_LOCAL_STREAMS_UNI 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "ngtcp2_crypto_quictls_from_ossl_encryption_level" "ac_cv_func_ngtcp2_crypto_quictls_from_ossl_encryption_level" +if test "x$ac_cv_func_ngtcp2_crypto_quictls_from_ossl_encryption_level" = xyes +then : + printf "%s\n" "#define HAVE_NGTCP2_CRYPTO_QUICTLS_FROM_OSSL_ENCRYPTION_LEVEL 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "ngtcp2_crypto_quictls_configure_server_context" "ac_cv_func_ngtcp2_crypto_quictls_configure_server_context" +if test "x$ac_cv_func_ngtcp2_crypto_quictls_configure_server_context" = xyes +then : + printf "%s\n" "#define HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "ngtcp2_crypto_quictls_configure_client_context" "ac_cv_func_ngtcp2_crypto_quictls_configure_client_context" +if test "x$ac_cv_func_ngtcp2_crypto_quictls_configure_client_context" = xyes +then : + printf "%s\n" "#define HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_CLIENT_CONTEXT 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "ngtcp2_conn_get_num_scid" "ac_cv_func_ngtcp2_conn_get_num_scid" +if test "x$ac_cv_func_ngtcp2_conn_get_num_scid" = xyes +then : + printf "%s\n" "#define HAVE_NGTCP2_CONN_GET_NUM_SCID 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "ngtcp2_conn_tls_early_data_rejected" "ac_cv_func_ngtcp2_conn_tls_early_data_rejected" +if test "x$ac_cv_func_ngtcp2_conn_tls_early_data_rejected" = xyes +then : + printf "%s\n" "#define HAVE_NGTCP2_CONN_TLS_EARLY_DATA_REJECTED 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "ngtcp2_conn_encode_0rtt_transport_params" "ac_cv_func_ngtcp2_conn_encode_0rtt_transport_params" +if test "x$ac_cv_func_ngtcp2_conn_encode_0rtt_transport_params" = xyes +then : + printf "%s\n" "#define HAVE_NGTCP2_CONN_ENCODE_0RTT_TRANSPORT_PARAMS 1" >>confdefs.h + +fi + + + for ac_func in SSL_is_quic +do : + ac_fn_c_check_func "$LINENO" "SSL_is_quic" "ac_cv_func_SSL_is_quic" +if test "x$ac_cv_func_SSL_is_quic" = xyes +then : + printf "%s\n" "#define HAVE_SSL_IS_QUIC 1" >>confdefs.h + +else $as_nop + as_fn_error $? "No QUIC support detected in OpenSSL. Need OpenSSL version with QUIC support to enable DNS over QUIC with libngtcp2." "$LINENO" 5 +fi + +done + ac_fn_c_check_type "$LINENO" "struct ngtcp2_version_cid" "ac_cv_type_struct_ngtcp2_version_cid" "$ac_includes_default + #include + +" +if test "x$ac_cv_type_struct_ngtcp2_version_cid" = xyes +then : + +printf "%s\n" "#define HAVE_STRUCT_NGTCP2_VERSION_CID 1" >>confdefs.h + + +fi +ac_fn_c_check_type "$LINENO" "ngtcp2_encryption_level" "ac_cv_type_ngtcp2_encryption_level" "$ac_includes_default + #include + +" +if test "x$ac_cv_type_ngtcp2_encryption_level" = xyes +then : + +printf "%s\n" "#define HAVE_NGTCP2_ENCRYPTION_LEVEL 1" >>confdefs.h + + +fi + + ac_fn_c_check_member "$LINENO" "struct ngtcp2_pkt_hd" "tokenlen" "ac_cv_member_struct_ngtcp2_pkt_hd_tokenlen" "$ac_includes_default + #include + +" +if test "x$ac_cv_member_struct_ngtcp2_pkt_hd_tokenlen" = xyes +then : + +printf "%s\n" "#define HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN 1" >>confdefs.h + + +fi +ac_fn_c_check_member "$LINENO" "struct ngtcp2_settings" "tokenlen" "ac_cv_member_struct_ngtcp2_settings_tokenlen" "$ac_includes_default + #include + +" +if test "x$ac_cv_member_struct_ngtcp2_settings_tokenlen" = xyes +then : + +printf "%s\n" "#define HAVE_STRUCT_NGTCP2_SETTINGS_TOKENLEN 1" >>confdefs.h + + +fi +ac_fn_c_check_member "$LINENO" "struct ngtcp2_settings" "max_tx_udp_payload_size" "ac_cv_member_struct_ngtcp2_settings_max_tx_udp_payload_size" "$ac_includes_default + #include + +" +if test "x$ac_cv_member_struct_ngtcp2_settings_max_tx_udp_payload_size" = xyes +then : + +printf "%s\n" "#define HAVE_STRUCT_NGTCP2_SETTINGS_MAX_TX_UDP_PAYLOAD_SIZE 1" >>confdefs.h + + +fi +ac_fn_c_check_member "$LINENO" "struct ngtcp2_transport_params" "original_dcid_present" "ac_cv_member_struct_ngtcp2_transport_params_original_dcid_present" "$ac_includes_default + #include + +" +if test "x$ac_cv_member_struct_ngtcp2_transport_params_original_dcid_present" = xyes +then : + +printf "%s\n" "#define HAVE_STRUCT_NGTCP2_TRANSPORT_PARAMS_ORIGINAL_DCID_PRESENT 1" >>confdefs.h + + +fi + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether ngtcp2_conn_shutdown_stream has 4 arguments" >&5 +printf %s "checking whether ngtcp2_conn_shutdown_stream has 4 arguments... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_includes_default +#include + +int +main (void) +{ + + (void)ngtcp2_conn_shutdown_stream(NULL, 0, 0, 0); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +printf "%s\n" "#define HAVE_NGTCP2_CONN_SHUTDOWN_STREAM4 1" >>confdefs.h + + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + +fi + # set static linking for uninstalled libraries if requested staticexe="" @@ -23788,10 +24137,12 @@ if test x_$enable_lock_checks = x_yes; then UBSYMS="-export-symbols clubsyms.def" cp ${srcdir}/libunbound/ubsyms.def clubsyms.def echo lock_protect >> clubsyms.def + echo lock_protect_place >> clubsyms.def echo lock_unprotect >> clubsyms.def echo lock_get_mem >> clubsyms.def echo checklock_start >> clubsyms.def echo checklock_stop >> clubsyms.def + echo checklock_set_output_name >> clubsyms.def echo checklock_lock >> clubsyms.def echo checklock_unlock >> clubsyms.def echo checklock_init >> clubsyms.def diff --git a/configure.ac b/configure.ac index eb093c840..a2d9d724b 100644 --- a/configure.ac +++ b/configure.ac @@ -1579,6 +1579,64 @@ if test x_$withval = x_yes -o x_$withval != x_no; then ]) fi +# ngtcp2 +AC_ARG_WITH(libngtcp2, AS_HELP_STRING([--with-libngtcp2=path],[specify explicit path for libngtcp2, for QUIC.]), + [ ],[ withval="no" ]) +found_libngtcp2="no" +if test x_$withval = x_yes -o x_$withval != x_no; then + AC_MSG_CHECKING(for libngtcp2) + if test x_$withval = x_ -o x_$withval = x_yes; then + withval="/usr/local /opt/local /usr/lib /usr/pkg /usr/sfw /usr" + fi + for dir in $withval ; do + if test -f "$dir/include/ngtcp2/ngtcp2.h"; then + found_libngtcp2="yes" + dnl assume /usr is in default path. + if test "$dir" != "/usr"; then + CPPFLAGS="$CPPFLAGS -I$dir/include" + LDFLAGS="$LDFLAGS -L$dir/lib" + fi + AC_MSG_RESULT(found in $dir) + AC_DEFINE([HAVE_NGTCP2], [1], [Define this to use ngtcp2.]) + LIBS="$LIBS -lngtcp2" + break; + fi + done + if test x_$found_libngtcp2 != x_yes; then + AC_MSG_ERROR([Could not find libngtcp2, ngtcp2.h]) + fi + AC_CHECK_HEADERS([ngtcp2/ngtcp2.h ngtcp2/ngtcp2_crypto_openssl.h ngtcp2/ngtcp2_crypto_quictls.h],,, [AC_INCLUDES_DEFAULT]) + AC_CHECK_DECLS([ngtcp2_conn_server_new], [], [], [AC_INCLUDES_DEFAULT + #include + ]) + AC_CHECK_DECLS([ngtcp2_crypto_encrypt_cb], [], [], [AC_INCLUDES_DEFAULT + #include + ]) + AC_CHECK_LIB([ngtcp2_crypto_openssl], [ngtcp2_crypto_encrypt_cb], [ LIBS="$LIBS -lngtcp2_crypto_openssl" ]) + AC_CHECK_LIB([ngtcp2_crypto_quictls], [ngtcp2_crypto_encrypt_cb], [ LIBS="$LIBS -lngtcp2_crypto_quictls" ]) + AC_CHECK_FUNCS([ngtcp2_crypto_encrypt_cb ngtcp2_ccerr_default ngtcp2_conn_in_closing_period ngtcp2_conn_in_draining_period ngtcp2_conn_get_max_local_streams_uni ngtcp2_crypto_quictls_from_ossl_encryption_level ngtcp2_crypto_quictls_configure_server_context ngtcp2_crypto_quictls_configure_client_context ngtcp2_conn_get_num_scid ngtcp2_conn_tls_early_data_rejected ngtcp2_conn_encode_0rtt_transport_params]) + AC_CHECK_FUNCS([SSL_is_quic], [], [AC_MSG_ERROR([No QUIC support detected in OpenSSL. Need OpenSSL version with QUIC support to enable DNS over QUIC with libngtcp2.])]) + AC_CHECK_TYPES([struct ngtcp2_version_cid, ngtcp2_encryption_level],,,[AC_INCLUDES_DEFAULT + #include + ]) + AC_CHECK_MEMBERS([struct ngtcp2_pkt_hd.tokenlen, struct ngtcp2_settings.tokenlen, struct ngtcp2_settings.max_tx_udp_payload_size, struct ngtcp2_transport_params.original_dcid_present],,,[AC_INCLUDES_DEFAULT + #include + ]) + + AC_MSG_CHECKING([whether ngtcp2_conn_shutdown_stream has 4 arguments]) + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([AC_INCLUDES_DEFAULT +#include + ],[ + (void)ngtcp2_conn_shutdown_stream(NULL, 0, 0, 0); + ])],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_NGTCP2_CONN_SHUTDOWN_STREAM4, 1, [Define if ngtcp2_conn_shutdown_stream has 4 arguments.]) + ],[ + AC_MSG_RESULT(no) + ]) + +fi + # set static linking for uninstalled libraries if requested AC_SUBST(staticexe) staticexe="" @@ -1894,10 +1952,12 @@ if test x_$enable_lock_checks = x_yes; then UBSYMS="-export-symbols clubsyms.def" cp ${srcdir}/libunbound/ubsyms.def clubsyms.def echo lock_protect >> clubsyms.def + echo lock_protect_place >> clubsyms.def echo lock_unprotect >> clubsyms.def echo lock_get_mem >> clubsyms.def echo checklock_start >> clubsyms.def echo checklock_stop >> clubsyms.def + echo checklock_set_output_name >> clubsyms.def echo checklock_lock >> clubsyms.def echo checklock_unlock >> clubsyms.def echo checklock_init >> clubsyms.def @@ -2356,6 +2416,8 @@ char *unbound_stat_strdup_log(const char *s, const char* file, int line, #define UNBOUND_DNS_OVER_TLS_PORT 853 /** default port for DNS over HTTPS traffic. */ #define UNBOUND_DNS_OVER_HTTPS_PORT 443 +/** default port for DNS over QUIC traffic. */ +#define UNBOUND_DNS_OVER_QUIC_PORT 853 /** default port for unbound control traffic, registered port with IANA, ub-dns-control 8953/tcp unbound dns nameserver control */ #define UNBOUND_CONTROL_PORT 8953 diff --git a/daemon/daemon.c b/daemon/daemon.c index 72b4a43be..1c8272b14 100644 --- a/daemon/daemon.c +++ b/daemon/daemon.c @@ -557,6 +557,12 @@ daemon_create_workers(struct daemon* daemon) fatal_exit("out of memory during daemon init"); numport = daemon_get_shufport(daemon, shufport); verbose(VERB_ALGO, "total of %d outgoing ports available", numport); + +#ifdef HAVE_NGTCP2 + daemon->doq_table = doq_table_create(daemon->cfg, daemon->rand); + if(!daemon->doq_table) + fatal_exit("could not create doq_table: out of memory"); +#endif daemon->num = (daemon->cfg->num_threads?daemon->cfg->num_threads:1); if(daemon->reuseport && (int)daemon->num < (int)daemon->num_ports) { @@ -906,6 +912,10 @@ daemon_cleanup(struct daemon* daemon) #ifdef USE_DNSCRYPT dnsc_delete(daemon->dnscenv); daemon->dnscenv = NULL; +#endif +#ifdef HAVE_NGTCP2 + doq_table_delete(daemon->doq_table); + daemon->doq_table = NULL; #endif daemon->cfg = NULL; } diff --git a/daemon/daemon.h b/daemon/daemon.h index 5c3a114cc..fc1bde713 100644 --- a/daemon/daemon.h +++ b/daemon/daemon.h @@ -58,6 +58,7 @@ struct ub_randstate; struct daemon_remote; struct respip_set; struct shm_main_info; +struct doq_table; struct cookie_secrets; #include "dnstap/dnstap_config.h" @@ -147,6 +148,8 @@ struct daemon { /** the dnscrypt environment */ struct dnsc_env* dnscenv; #endif + /** the doq connection table */ + struct doq_table* doq_table; /** reuse existing cache on reload if other conditions allow it. */ int reuse_cache; /** the EDNS cookie secrets from the cookie-secret-file */ diff --git a/daemon/remote.c b/daemon/remote.c index 5af03328e..8877cd194 100644 --- a/daemon/remote.c +++ b/daemon/remote.c @@ -302,7 +302,7 @@ add_open(const char* ip, int nr, struct listen_port** list, int noproto_is_err, /* open fd */ fd = create_tcp_accept_sock(res, 1, &noproto, 0, cfg->ip_transparent, 0, 0, cfg->ip_freebind, - cfg->use_systemd, cfg->ip_dscp); + cfg->use_systemd, cfg->ip_dscp, "unbound-control"); freeaddrinfo(res); } @@ -866,6 +866,10 @@ print_mem(RES* ssl, struct worker* worker, struct daemon* daemon, if(!print_longnum(ssl, "mem.http.response_buffer"SQ, (size_t)s->svr.mem_http2_response_buffer)) return 0; +#ifdef HAVE_NGTCP2 + if(!print_longnum(ssl, "mem.quic"SQ, (size_t)s->svr.mem_quic)) + return 0; +#endif /* HAVE_NGTCP2 */ return 1; } @@ -996,6 +1000,10 @@ print_ext(RES* ssl, struct ub_stats_info* s, int inhibit_zero) (unsigned long)s->svr.qipv6)) return 0; if(!ssl_printf(ssl, "num.query.https"SQ"%lu\n", (unsigned long)s->svr.qhttps)) return 0; +#ifdef HAVE_NGTCP2 + if(!ssl_printf(ssl, "num.query.quic"SQ"%lu\n", + (unsigned long)s->svr.qquic)) return 0; +#endif /* HAVE_NGTCP2 */ /* flags */ if(!ssl_printf(ssl, "num.query.flags.QR"SQ"%lu\n", (unsigned long)s->svr.qbit_QR)) return 0; diff --git a/daemon/stats.c b/daemon/stats.c index 827110698..0e17300a1 100644 --- a/daemon/stats.c +++ b/daemon/stats.c @@ -346,6 +346,12 @@ server_stats_compile(struct worker* worker, struct ub_stats_info* s, int reset) (long long)http2_get_query_buffer_size(); s->svr.mem_http2_response_buffer = (long long)http2_get_response_buffer_size(); +#ifdef HAVE_NGTCP2 + s->svr.mem_quic = (long long)doq_table_quic_size_get( + worker->daemon->doq_table); +#else + s->svr.mem_quic = 0; +#endif /* HAVE_NGTCP2 */ /* Set neg cache usage numbers */ set_neg_cache_stats(worker, &s->svr, reset); @@ -474,6 +480,7 @@ void server_stats_add(struct ub_stats_info* total, struct ub_stats_info* a) total->svr.qtls += a->svr.qtls; total->svr.qtls_resume += a->svr.qtls_resume; total->svr.qhttps += a->svr.qhttps; + total->svr.qquic += a->svr.qquic; total->svr.qipv6 += a->svr.qipv6; total->svr.qbit_QR += a->svr.qbit_QR; total->svr.qbit_AA += a->svr.qbit_AA; @@ -533,7 +540,8 @@ void server_stats_insquery(struct ub_server_stats* stats, struct comm_point* c, else stats->qclass_big++; stats->qopcode[ LDNS_OPCODE_WIRE(sldns_buffer_begin(c->buffer)) ]++; if(c->type != comm_udp) { - stats->qtcp++; + if(c->type != comm_doq) + stats->qtcp++; if(c->ssl != NULL) { stats->qtls++; #ifdef HAVE_SSL @@ -542,6 +550,10 @@ void server_stats_insquery(struct ub_server_stats* stats, struct comm_point* c, #endif if(c->type == comm_http) stats->qhttps++; +#ifdef HAVE_NGTCP2 + else if(c->type == comm_doq) + stats->qquic++; +#endif } } if(repinfo && addr_is_ip6(&repinfo->remote_addr, repinfo->remote_addrlen)) diff --git a/daemon/worker.c b/daemon/worker.c index fe105eb7b..713de3163 100644 --- a/daemon/worker.c +++ b/daemon/worker.c @@ -2174,7 +2174,9 @@ worker_init(struct worker* worker, struct config_file *cfg, cfg->harden_large_queries, cfg->http_max_streams, cfg->http_endpoint, cfg->http_notls_downstream, worker->daemon->tcl, worker->daemon->listen_sslctx, - dtenv, worker_handle_request, worker); + dtenv, worker->daemon->doq_table, worker->env.rnd, + cfg->ssl_service_key, cfg->ssl_service_pem, cfg, + worker_handle_request, worker); if(!worker->front) { log_err("could not create listening sockets"); worker_delete(worker); @@ -2508,3 +2510,19 @@ void dtio_mainfdcallback(int ATTR_UNUSED(fd), short ATTR_UNUSED(ev), log_assert(0); } #endif + +#ifdef HAVE_NGTCP2 +void doq_client_event_cb(int ATTR_UNUSED(fd), short ATTR_UNUSED(ev), + void* ATTR_UNUSED(arg)) +{ + log_assert(0); +} +#endif + +#ifdef HAVE_NGTCP2 +void doq_client_timer_cb(int ATTR_UNUSED(fd), short ATTR_UNUSED(ev), + void* ATTR_UNUSED(arg)) +{ + log_assert(0); +} +#endif diff --git a/dnstap/unbound-dnstap-socket.c b/dnstap/unbound-dnstap-socket.c index b6b993d54..7f8be4965 100644 --- a/dnstap/unbound-dnstap-socket.c +++ b/dnstap/unbound-dnstap-socket.c @@ -1785,3 +1785,19 @@ void remote_get_opt_ssl(char* ATTR_UNUSED(str), void* ATTR_UNUSED(arg)) { log_assert(0); } + +#ifdef HAVE_NGTCP2 +void doq_client_event_cb(int ATTR_UNUSED(fd), short ATTR_UNUSED(ev), + void* ATTR_UNUSED(arg)) +{ + log_assert(0); +} +#endif + +#ifdef HAVE_NGTCP2 +void doq_client_timer_cb(int ATTR_UNUSED(fd), short ATTR_UNUSED(ev), + void* ATTR_UNUSED(arg)) +{ + log_assert(0); +} +#endif diff --git a/doc/example.conf.in b/doc/example.conf.in index 2d16ee75f..06e2b4ba8 100644 --- a/doc/example.conf.in +++ b/doc/example.conf.in @@ -920,6 +920,7 @@ server: # tls-service-pem: "path/to/publiccertfile.pem" # tls-port: 853 # https-port: 443 + # quic-port: 853 # cipher setting for TLSv1.2 # tls-ciphers: "DHE-RSA-AES256-GCM-SHA384:DHE-RSA-AES128-GCM-SHA256:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-SHA256:DHE-RSA-AES128-SHA256:ECDHE-RSA-AES256-SHA384:ECDHE-RSA-AES128-SHA256" @@ -984,6 +985,9 @@ server: # Disable TLS for DNS-over-HTTP downstream service. # http-notls-downstream: no + # Maximum number of bytes used for QUIC buffers. + # quic-size: 8m + # The interfaces that use these listed port numbers will support and # expect PROXYv2. For UDP and TCP/TLS interfaces. # proxy-protocol-port: portno for each of the port numbers. diff --git a/doc/unbound-control.8.in b/doc/unbound-control.8.in index 17073f938..6470d544c 100644 --- a/doc/unbound-control.8.in +++ b/doc/unbound-control.8.in @@ -606,6 +606,10 @@ queries waiting for request stream completion. Memory in bytes used by the HTTP/2 response buffers. Containing DNS responses waiting to be written back to the clients. .TP +.I mem.quic +Memory in bytes used by QUIC. Containing connection information, stream +information, queries read and responses written back to the clients. +.TP .I histogram...to.. Shows a histogram, summed over all threads. Every element counts the recursive queries whose reply time fit between the lower and upper bound. @@ -654,6 +658,10 @@ Number of queries that were made using HTTPS towards the Unbound server. These are also counted in num.query.tcp and num.query.tls, because HTTPS uses TLS and TCP. .TP +.I num.query.quic +Number of queries that were made using QUIC towards the Unbound server. +These are also counted in num.query.tls, because TLS is used for these queries. +.TP .I num.query.ipv6 Number of queries that were made using IPv6 towards the Unbound server. .TP diff --git a/doc/unbound.conf.5.in b/doc/unbound.conf.5.in index 2a5f6792a..da494087c 100644 --- a/doc/unbound.conf.5.in +++ b/doc/unbound.conf.5.in @@ -719,6 +719,18 @@ PROXYv2 is supported for UDP and TCP/TLS listening interfaces. There is no support for PROXYv2 on a DoH or DNSCrypt listening interface. Can list multiple, each on a new statement. .TP +.B quic\-port: \fI +The port number on which to provide DNS-over-QUIC service, default 853, only +interfaces configured with that port number as @number get the QUIC service. +The interface uses QUIC for the UDP traffic on that port number. +.TP +.B quic\-size: \fI +Maximum number of bytes for all QUIC buffers and data combined. Default is 8 +megabytes. A plain number is in bytes, append 'k', 'm' or 'g' for kilobytes, +megabytes or gigabytes (1024*1024 bytes in a megabyte). New connections receive +connection refused when the limit is exceeded. New streams are reset when the +limit is exceeded. +.TP .B use\-systemd: \fI Enable or disable systemd socket activation. Default is no. diff --git a/libunbound/libworker.c b/libunbound/libworker.c index 94b644a49..da7d4c224 100644 --- a/libunbound/libworker.c +++ b/libunbound/libworker.c @@ -1058,3 +1058,19 @@ void dtio_mainfdcallback(int ATTR_UNUSED(fd), short ATTR_UNUSED(ev), log_assert(0); } #endif + +#ifdef HAVE_NGTCP2 +void doq_client_event_cb(int ATTR_UNUSED(fd), short ATTR_UNUSED(ev), + void* ATTR_UNUSED(arg)) +{ + log_assert(0); +} +#endif + +#ifdef HAVE_NGTCP2 +void doq_client_timer_cb(int ATTR_UNUSED(fd), short ATTR_UNUSED(ev), + void* ATTR_UNUSED(arg)) +{ + log_assert(0); +} +#endif diff --git a/libunbound/unbound.h b/libunbound/unbound.h index bb8e8acf0..ef2c5c067 100644 --- a/libunbound/unbound.h +++ b/libunbound/unbound.h @@ -845,6 +845,10 @@ struct ub_server_stats { long long qtls_resume; /** RPZ action stats */ long long rpz_action[UB_STATS_RPZ_ACTION_NUM]; + /** number of bytes in QUIC buffers */ + long long mem_quic; + /** number of queries over (DNS over) QUIC */ + long long qquic; }; /** diff --git a/services/listen_dnsport.c b/services/listen_dnsport.c index 6c0691f2a..078a344d3 100644 --- a/services/listen_dnsport.c +++ b/services/listen_dnsport.c @@ -56,9 +56,11 @@ #include "util/net_help.h" #include "sldns/sbuffer.h" #include "sldns/parseutil.h" +#include "sldns/wire2str.h" #include "services/mesh.h" #include "util/fptr_wlist.h" #include "util/locks.h" +#include "util/timeval_func.h" #ifdef HAVE_NETDB_H #include @@ -79,9 +81,30 @@ #ifdef HAVE_NET_IF_H #include #endif + +#ifdef HAVE_TIME_H +#include +#endif +#include + +#ifdef HAVE_NGTCP2 +#include +#include +#ifdef HAVE_NGTCP2_NGTCP2_CRYPTO_QUICTLS_H +#include +#else +#include +#endif +#endif + +#ifdef HAVE_OPENSSL_SSL_H +#include +#endif + #ifdef HAVE_LINUX_NET_TSTAMP_H #include #endif + /** number of queued TCP connections for listen() */ #define TCP_BACKLOG 256 @@ -109,9 +132,11 @@ static int http2_response_buffer_lock_inited = 0; /** * Debug print of the getaddrinfo returned address. * @param addr: the address returned. + * @param additional: additional text that describes the type of socket, + * or NULL for no text. */ static void -verbose_print_addr(struct addrinfo *addr) +verbose_print_addr(struct addrinfo *addr, const char* additional) { if(verbosity >= VERB_ALGO) { char buf[100]; @@ -126,13 +151,14 @@ verbose_print_addr(struct addrinfo *addr) (void)strlcpy(buf, "(null)", sizeof(buf)); } buf[sizeof(buf)-1] = 0; - verbose(VERB_ALGO, "creating %s%s socket %s %d", + verbose(VERB_ALGO, "creating %s%s socket %s %d%s%s", addr->ai_socktype==SOCK_DGRAM?"udp": addr->ai_socktype==SOCK_STREAM?"tcp":"otherproto", addr->ai_family==AF_INET?"4": addr->ai_family==AF_INET6?"6": "_otherfam", buf, - ntohs(((struct sockaddr_in*)addr->ai_addr)->sin_port)); + ntohs(((struct sockaddr_in*)addr->ai_addr)->sin_port), + (additional?" ":""), (additional?additional:"")); } } @@ -673,7 +699,7 @@ create_udp_sock(int family, int socktype, struct sockaddr* addr, int create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto, int* reuseport, int transparent, int mss, int nodelay, int freebind, - int use_systemd, int dscp) + int use_systemd, int dscp, const char* additional) { int s = -1; char* err; @@ -692,7 +718,7 @@ create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto, #if !defined(IP_FREEBIND) (void)freebind; #endif - verbose_print_addr(addr); + verbose_print_addr(addr, additional); *noproto = 0; #ifdef HAVE_SYSTEMD if (!use_systemd || @@ -1008,7 +1034,8 @@ static int make_sock(int stype, const char* ifname, const char* port, struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd, int* reuseport, int transparent, int tcp_mss, int nodelay, int freebind, - int use_systemd, int dscp, struct unbound_socket* ub_sock) + int use_systemd, int dscp, struct unbound_socket* ub_sock, + const char* additional) { struct addrinfo *res = NULL; int r, s, inuse, noproto; @@ -1032,7 +1059,7 @@ make_sock(int stype, const char* ifname, const char* port, return -1; } if(stype == SOCK_DGRAM) { - verbose_print_addr(res); + verbose_print_addr(res, additional); s = create_udp_sock(res->ai_family, res->ai_socktype, (struct sockaddr*)res->ai_addr, res->ai_addrlen, v6only, &inuse, &noproto, (int)rcv, (int)snd, 1, @@ -1045,7 +1072,7 @@ make_sock(int stype, const char* ifname, const char* port, } else { s = create_tcp_accept_sock(res, v6only, &noproto, reuseport, transparent, tcp_mss, nodelay, freebind, use_systemd, - dscp); + dscp, additional); if(s == -1 && noproto && hints->ai_family == AF_INET6){ *noip6 = 1; } @@ -1079,7 +1106,8 @@ static int make_sock_port(int stype, const char* ifname, const char* port, struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd, int* reuseport, int transparent, int tcp_mss, int nodelay, int freebind, - int use_systemd, int dscp, struct unbound_socket* ub_sock) + int use_systemd, int dscp, struct unbound_socket* ub_sock, + const char* additional) { char* s = strchr(ifname, '@'); if(s) { @@ -1102,11 +1130,11 @@ make_sock_port(int stype, const char* ifname, const char* port, p[strlen(s+1)]=0; return make_sock(stype, newif, p, hints, v6only, noip6, rcv, snd, reuseport, transparent, tcp_mss, nodelay, freebind, - use_systemd, dscp, ub_sock); + use_systemd, dscp, ub_sock, additional); } return make_sock(stype, ifname, port, hints, v6only, noip6, rcv, snd, reuseport, transparent, tcp_mss, nodelay, freebind, use_systemd, - dscp, ub_sock); + dscp, ub_sock, additional); } /** @@ -1254,6 +1282,8 @@ if_is_ssl(const char* ifname, const char* port, int ssl_port, * @param use_systemd: if true, fetch sockets from systemd. * @param dnscrypt_port: dnscrypt service port number * @param dscp: DSCP to use. + * @param quic_port: dns over quic port number. + * @param http_notls_downstream: if no tls is used for https downstream. * @param sock_queue_timeout: the sock_queue_timeout from config. Seconds to * wait to discard if UDP packets have waited for long in the socket * buffer. @@ -1267,7 +1297,7 @@ ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp, struct config_strlist* proxy_protocol_port, int* reuseport, int transparent, int tcp_mss, int freebind, int http2_nodelay, int use_systemd, int dnscrypt_port, int dscp, - int sock_queue_timeout) + int quic_port, int http_notls_downstream, int sock_queue_timeout) { int s, noip6=0; int is_https = if_is_https(ifname, port, https_port); @@ -1275,6 +1305,8 @@ ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp, int is_pp2 = if_is_pp2(ifname, port, proxy_protocol_port); int nodelay = is_https && http2_nodelay; struct unbound_socket* ub_sock; + int is_doq = if_is_quic(ifname, port, quic_port); + const char* add = NULL; if(!do_udp && !do_tcp) return 0; @@ -1286,6 +1318,9 @@ ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp, } else if(is_https) { fatal_exit("PROXYv2 and DoH combination not " "supported!"); + } else if(is_doq) { + fatal_exit("PROXYv2 and DoQ combination not " + "supported!"); } } @@ -1295,7 +1330,8 @@ ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp, return 0; if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1, &noip6, rcv, snd, reuseport, transparent, - tcp_mss, nodelay, freebind, use_systemd, dscp, ub_sock)) == -1) { + tcp_mss, nodelay, freebind, use_systemd, dscp, ub_sock, + (is_dnscrypt?"udpancil_dnscrypt":"udpancil"))) == -1) { free(ub_sock->addr); free(ub_sock); if(noip6) { @@ -1323,13 +1359,36 @@ ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp, return 0; } } else if(do_udp) { + enum listen_type udp_port_type; ub_sock = calloc(1, sizeof(struct unbound_socket)); if(!ub_sock) return 0; + if(is_dnscrypt) { + udp_port_type = listen_type_udp_dnscrypt; + add = "dnscrypt"; + } else if(is_doq) { + udp_port_type = listen_type_doq; + add = "doq"; + if(((strchr(ifname, '@') && + atoi(strchr(ifname, '@')+1) == 53) || + (!strchr(ifname, '@') && atoi(port) == 53))) { + log_err("DNS over QUIC is not allowed on " + "port 53. Port 53 is for DNS " + "datagrams. Error for " + "interface '%s'.", ifname); + free(ub_sock->addr); + free(ub_sock); + return 0; + } + } else { + udp_port_type = listen_type_udp; + add = NULL; + } /* regular udp socket */ if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1, &noip6, rcv, snd, reuseport, transparent, - tcp_mss, nodelay, freebind, use_systemd, dscp, ub_sock)) == -1) { + tcp_mss, nodelay, freebind, use_systemd, dscp, ub_sock, + add)) == -1) { free(ub_sock->addr); free(ub_sock); if(noip6) { @@ -1338,14 +1397,25 @@ ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp, } return 0; } - if (sock_queue_timeout && !set_recvtimestamp(s)) { - log_warn("socket timestamping is not available"); + if(udp_port_type == listen_type_doq) { + if(!set_recvpktinfo(s, hints->ai_family)) { + sock_close(s); + free(ub_sock->addr); + free(ub_sock); + return 0; + } } - if(!port_insert(list, s, is_dnscrypt - ?listen_type_udp_dnscrypt : - (sock_queue_timeout ? - listen_type_udpancil:listen_type_udp), - is_pp2, ub_sock)) { + if(udp_port_type == listen_type_udp && sock_queue_timeout) + udp_port_type = listen_type_udpancil; + if (sock_queue_timeout) { + if(!set_recvtimestamp(s)) { + log_warn("socket timestamping is not available"); + } else { + if(udp_port_type == listen_type_udp) + udp_port_type = listen_type_udpancil; + } + } + if(!port_insert(list, s, udp_port_type, is_pp2, ub_sock)) { sock_close(s); free(ub_sock->addr); free(ub_sock); @@ -1359,17 +1429,24 @@ ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp, ub_sock = calloc(1, sizeof(struct unbound_socket)); if(!ub_sock) return 0; - if(is_ssl) + if(is_ssl) { port_type = listen_type_ssl; - else if(is_https) + add = "tls"; + } else if(is_https) { port_type = listen_type_http; - else if(is_dnscrypt) + add = "https"; + if(http_notls_downstream) + add = "http"; + } else if(is_dnscrypt) { port_type = listen_type_tcp_dnscrypt; - else + add = "dnscrypt"; + } else { port_type = listen_type_tcp; + add = NULL; + } if((s = make_sock_port(SOCK_STREAM, ifname, port, hints, 1, &noip6, 0, 0, reuseport, transparent, tcp_mss, nodelay, - freebind, use_systemd, dscp, ub_sock)) == -1) { + freebind, use_systemd, dscp, ub_sock, add)) == -1) { free(ub_sock->addr); free(ub_sock); if(noip6) { @@ -1446,8 +1523,10 @@ listen_create(struct comm_base* base, struct listen_port* ports, size_t bufsize, int tcp_accept_count, int tcp_idle_timeout, int harden_large_queries, uint32_t http_max_streams, char* http_endpoint, int http_notls, struct tcl_list* tcp_conn_limit, - void* sslctx, struct dt_env* dtenv, comm_point_callback_type* cb, - void *cb_arg) + void* sslctx, struct dt_env* dtenv, struct doq_table* doq_table, + struct ub_randstate* rnd, const char* ssl_service_key, + const char* ssl_service_pem, struct config_file* cfg, + comm_point_callback_type* cb, void *cb_arg) { struct listen_dnsport* front = (struct listen_dnsport*) malloc(sizeof(struct listen_dnsport)); @@ -1471,6 +1550,16 @@ listen_create(struct comm_base* base, struct listen_port* ports, cp = comm_point_create_udp(base, ports->fd, front->udp_buff, ports->pp2_enabled, cb, cb_arg, ports->socket); + } else if(ports->ftype == listen_type_doq) { +#ifndef HAVE_NGTCP2 + log_warn("Unbound is not compiled with " + "ngtcp2. This is required to use DNS " + "over QUIC."); +#endif + cp = comm_point_create_doq(base, ports->fd, + front->udp_buff, cb, cb_arg, ports->socket, + doq_table, rnd, ssl_service_key, + ssl_service_pem, cfg); } else if(ports->ftype == listen_type_tcp || ports->ftype == listen_type_tcp_dnscrypt) { cp = comm_point_create_tcp(base, ports->fd, @@ -1858,7 +1947,9 @@ listening_ports_open(struct config_file* cfg, char** ifs, int num_ifs, reuseport, cfg->ip_transparent, cfg->tcp_mss, cfg->ip_freebind, cfg->http_nodelay, cfg->use_systemd, - cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) { + cfg->dnscrypt_port, cfg->ip_dscp, + cfg->quic_port, cfg->http_notls_downstream, + cfg->sock_queue_timeout)) { listening_ports_free(list); return NULL; } @@ -1875,7 +1966,9 @@ listening_ports_open(struct config_file* cfg, char** ifs, int num_ifs, reuseport, cfg->ip_transparent, cfg->tcp_mss, cfg->ip_freebind, cfg->http_nodelay, cfg->use_systemd, - cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) { + cfg->dnscrypt_port, cfg->ip_dscp, + cfg->quic_port, cfg->http_notls_downstream, + cfg->sock_queue_timeout)) { listening_ports_free(list); return NULL; } @@ -1894,7 +1987,9 @@ listening_ports_open(struct config_file* cfg, char** ifs, int num_ifs, reuseport, cfg->ip_transparent, cfg->tcp_mss, cfg->ip_freebind, cfg->http_nodelay, cfg->use_systemd, - cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) { + cfg->dnscrypt_port, cfg->ip_dscp, + cfg->quic_port, cfg->http_notls_downstream, + cfg->sock_queue_timeout)) { listening_ports_free(list); return NULL; } @@ -1910,7 +2005,9 @@ listening_ports_open(struct config_file* cfg, char** ifs, int num_ifs, reuseport, cfg->ip_transparent, cfg->tcp_mss, cfg->ip_freebind, cfg->http_nodelay, cfg->use_systemd, - cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) { + cfg->dnscrypt_port, cfg->ip_dscp, + cfg->quic_port, cfg->http_notls_downstream, + cfg->sock_queue_timeout)) { listening_ports_free(list); return NULL; } @@ -1928,7 +2025,9 @@ listening_ports_open(struct config_file* cfg, char** ifs, int num_ifs, reuseport, cfg->ip_transparent, cfg->tcp_mss, cfg->ip_freebind, cfg->http_nodelay, cfg->use_systemd, - cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) { + cfg->dnscrypt_port, cfg->ip_dscp, + cfg->quic_port, cfg->http_notls_downstream, + cfg->sock_queue_timeout)) { listening_ports_free(list); return NULL; } @@ -1944,7 +2043,9 @@ listening_ports_open(struct config_file* cfg, char** ifs, int num_ifs, reuseport, cfg->ip_transparent, cfg->tcp_mss, cfg->ip_freebind, cfg->http_nodelay, cfg->use_systemd, - cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) { + cfg->dnscrypt_port, cfg->ip_dscp, + cfg->quic_port, cfg->http_notls_downstream, + cfg->sock_queue_timeout)) { listening_ports_free(list); return NULL; } @@ -3154,3 +3255,2365 @@ nghttp2_session_callbacks* http2_req_callbacks_create(void) return callbacks; } #endif /* HAVE_NGHTTP2 */ + +#ifdef HAVE_NGTCP2 +struct doq_table* +doq_table_create(struct config_file* cfg, struct ub_randstate* rnd) +{ + struct doq_table* table = calloc(1, sizeof(*table)); + if(!table) + return NULL; + table->idle_timeout = ((uint64_t)cfg->tcp_idle_timeout)* + NGTCP2_MILLISECONDS; + table->sv_scidlen = 16; + table->static_secret_len = 16; + table->static_secret = malloc(table->static_secret_len); + if(!table->static_secret) { + free(table); + return NULL; + } + doq_fill_rand(rnd, table->static_secret, table->static_secret_len); + table->conn_tree = rbtree_create(doq_conn_cmp); + if(!table->conn_tree) { + free(table->static_secret); + free(table); + return NULL; + } + table->conid_tree = rbtree_create(doq_conid_cmp); + if(!table->conid_tree) { + free(table->static_secret); + free(table->conn_tree); + free(table); + return NULL; + } + table->timer_tree = rbtree_create(doq_timer_cmp); + if(!table->timer_tree) { + free(table->static_secret); + free(table->conn_tree); + free(table->conid_tree); + free(table); + return NULL; + } + lock_rw_init(&table->lock); + lock_rw_init(&table->conid_lock); + lock_basic_init(&table->size_lock); + lock_protect(&table->lock, &table->static_secret, + sizeof(table->static_secret)); + lock_protect(&table->lock, &table->static_secret_len, + sizeof(table->static_secret_len)); + lock_protect(&table->lock, table->static_secret, + table->static_secret_len); + lock_protect(&table->lock, &table->sv_scidlen, + sizeof(table->sv_scidlen)); + lock_protect(&table->lock, &table->idle_timeout, + sizeof(table->idle_timeout)); + lock_protect(&table->lock, &table->conn_tree, sizeof(table->conn_tree)); + lock_protect(&table->lock, table->conn_tree, sizeof(*table->conn_tree)); + lock_protect(&table->conid_lock, table->conid_tree, + sizeof(*table->conid_tree)); + lock_protect(&table->lock, table->timer_tree, + sizeof(*table->timer_tree)); + lock_protect(&table->size_lock, &table->current_size, + sizeof(table->current_size)); + return table; +} + +/** delete elements from the connection tree */ +static void +conn_tree_del(rbnode_type* node, void* arg) +{ + struct doq_table* table = (struct doq_table*)arg; + struct doq_conn* conn; + if(!node) + return; + conn = (struct doq_conn*)node->key; + if(conn->timer.timer_in_list) { + /* Remove timer from list first, because finding the rbnode + * element of the setlist of same timeouts needs tree lookup. + * Edit the tree structure after that lookup. */ + doq_timer_list_remove(conn->table, &conn->timer); + } + if(conn->timer.timer_in_tree) + doq_timer_tree_remove(conn->table, &conn->timer); + doq_table_quic_size_subtract(table, sizeof(*conn)+conn->key.dcidlen); + doq_conn_delete(conn, table); +} + +/** delete elements from the connection id tree */ +static void +conid_tree_del(rbnode_type* node, void* ATTR_UNUSED(arg)) +{ + if(!node) + return; + doq_conid_delete((struct doq_conid*)node->key); +} + +void +doq_table_delete(struct doq_table* table) +{ + if(!table) + return; + lock_rw_destroy(&table->lock); + free(table->static_secret); + if(table->conn_tree) { + traverse_postorder(table->conn_tree, conn_tree_del, table); + free(table->conn_tree); + } + lock_rw_destroy(&table->conid_lock); + if(table->conid_tree) { + /* The tree should be empty, because the doq_conn_delete calls + * above should have also removed their conid elements. */ + traverse_postorder(table->conid_tree, conid_tree_del, NULL); + free(table->conid_tree); + } + lock_basic_destroy(&table->size_lock); + if(table->timer_tree) { + /* The tree should be empty, because the conn_tree_del calls + * above should also have removed them. Also the doq_timer + * is part of the doq_conn struct, so is already freed. */ + free(table->timer_tree); + } + table->write_list_first = NULL; + table->write_list_last = NULL; + free(table); +} + +struct doq_timer* +doq_timer_find_time(struct doq_table* table, struct timeval* tv) +{ + struct doq_timer key; + struct rbnode_type* node; + memset(&key, 0, sizeof(key)); + key.time.tv_sec = tv->tv_sec; + key.time.tv_usec = tv->tv_usec; + node = rbtree_search(table->timer_tree, &key); + if(node) + return (struct doq_timer*)node->key; + return NULL; +} + +void +doq_timer_tree_remove(struct doq_table* table, struct doq_timer* timer) +{ + if(!timer->timer_in_tree) + return; + rbtree_delete(table->timer_tree, timer); + timer->timer_in_tree = 0; + /* This item could have more timers in the same set. */ + if(timer->setlist_first) { + struct doq_timer* rb_timer = timer->setlist_first; + /* del first element from setlist */ + if(rb_timer->setlist_next) + rb_timer->setlist_next->setlist_prev = NULL; + else + timer->setlist_last = NULL; + timer->setlist_first = rb_timer->setlist_next; + rb_timer->setlist_prev = NULL; + rb_timer->setlist_next = NULL; + rb_timer->timer_in_list = 0; + /* insert it into the tree as new rb element */ + memset(&rb_timer->node, 0, sizeof(rb_timer->node)); + rb_timer->node.key = rb_timer; + rbtree_insert(table->timer_tree, &rb_timer->node); + rb_timer->timer_in_tree = 1; + /* the setlist, if any remainder, moves to the rb element */ + rb_timer->setlist_first = timer->setlist_first; + rb_timer->setlist_last = timer->setlist_last; + timer->setlist_first = NULL; + timer->setlist_last = NULL; + rb_timer->worker_doq_socket = timer->worker_doq_socket; + } + timer->worker_doq_socket = NULL; +} + +void +doq_timer_list_remove(struct doq_table* table, struct doq_timer* timer) +{ + struct doq_timer* rb_timer; + if(!timer->timer_in_list) + return; + /* The item in the rbtree has the list start and end. */ + rb_timer = doq_timer_find_time(table, &timer->time); + if(rb_timer) { + if(timer->setlist_prev) + timer->setlist_prev->setlist_next = timer->setlist_next; + else + rb_timer->setlist_first = timer->setlist_next; + if(timer->setlist_next) + timer->setlist_next->setlist_prev = timer->setlist_prev; + else + rb_timer->setlist_last = timer->setlist_prev; + timer->setlist_prev = NULL; + timer->setlist_next = NULL; + } + timer->timer_in_list = 0; +} + +/** doq append timer to setlist */ +static void +doq_timer_list_append(struct doq_timer* rb_timer, struct doq_timer* timer) +{ + log_assert(timer->timer_in_list == 0); + timer->timer_in_list = 1; + timer->setlist_next = NULL; + timer->setlist_prev = rb_timer->setlist_last; + if(rb_timer->setlist_last) + rb_timer->setlist_last->setlist_next = timer; + else + rb_timer->setlist_first = timer; + rb_timer->setlist_last = timer; +} + +void +doq_timer_unset(struct doq_table* table, struct doq_timer* timer) +{ + if(timer->timer_in_list) { + /* Remove timer from list first, because finding the rbnode + * element of the setlist of same timeouts needs tree lookup. + * Edit the tree structure after that lookup. */ + doq_timer_list_remove(table, timer); + } + if(timer->timer_in_tree) + doq_timer_tree_remove(table, timer); + timer->worker_doq_socket = NULL; +} + +void doq_timer_set(struct doq_table* table, struct doq_timer* timer, + struct doq_server_socket* worker_doq_socket, struct timeval* tv) +{ + struct doq_timer* rb_timer; + if(verbosity >= VERB_ALGO && timer->conn) { + char a[256]; + struct timeval rel; + addr_to_str((void*)&timer->conn->key.paddr.addr, + timer->conn->key.paddr.addrlen, a, sizeof(a)); + timeval_subtract(&rel, tv, worker_doq_socket->now_tv); + verbose(VERB_ALGO, "doq %s timer set %d.%6.6d in %d.%6.6d", + a, (int)tv->tv_sec, (int)tv->tv_usec, + (int)rel.tv_sec, (int)rel.tv_usec); + } + if(timer->timer_in_tree || timer->timer_in_list) { + if(timer->time.tv_sec == tv->tv_sec && + timer->time.tv_usec == tv->tv_usec) + return; /* already set on that time */ + doq_timer_unset(table, timer); + } + timer->time.tv_sec = tv->tv_sec; + timer->time.tv_usec = tv->tv_usec; + rb_timer = doq_timer_find_time(table, tv); + if(rb_timer) { + /* There is a timeout already with this value. Timer is + * added to the setlist. */ + doq_timer_list_append(rb_timer, timer); + } else { + /* There is no timeout with this value. Make timer a new + * tree element. */ + memset(&timer->node, 0, sizeof(timer->node)); + timer->node.key = timer; + rbtree_insert(table->timer_tree, &timer->node); + timer->timer_in_tree = 1; + timer->setlist_first = NULL; + timer->setlist_last = NULL; + timer->worker_doq_socket = worker_doq_socket; + } +} + +struct doq_conn* +doq_conn_create(struct comm_point* c, struct doq_pkt_addr* paddr, + const uint8_t* dcid, size_t dcidlen, uint32_t version) +{ + struct doq_conn* conn = calloc(1, sizeof(*conn)); + if(!conn) + return NULL; + conn->node.key = conn; + conn->doq_socket = c->doq_socket; + conn->table = c->doq_socket->table; + memmove(&conn->key.paddr.addr, &paddr->addr, paddr->addrlen); + conn->key.paddr.addrlen = paddr->addrlen; + memmove(&conn->key.paddr.localaddr, &paddr->localaddr, + paddr->localaddrlen); + conn->key.paddr.localaddrlen = paddr->localaddrlen; + conn->key.paddr.ifindex = paddr->ifindex; + conn->key.dcid = memdup((void*)dcid, dcidlen); + if(!conn->key.dcid) { + free(conn); + return NULL; + } + conn->key.dcidlen = dcidlen; + conn->version = version; +#ifdef HAVE_NGTCP2_CCERR_DEFAULT + ngtcp2_ccerr_default(&conn->ccerr); +#else + ngtcp2_connection_close_error_default(&conn->last_error); +#endif + rbtree_init(&conn->stream_tree, &doq_stream_cmp); + conn->timer.conn = conn; + lock_basic_init(&conn->lock); + lock_protect(&conn->lock, &conn->key, sizeof(conn->key)); + lock_protect(&conn->lock, &conn->doq_socket, sizeof(conn->doq_socket)); + lock_protect(&conn->lock, &conn->table, sizeof(conn->table)); + lock_protect(&conn->lock, &conn->is_deleted, sizeof(conn->is_deleted)); + lock_protect(&conn->lock, &conn->version, sizeof(conn->version)); + lock_protect(&conn->lock, &conn->conn, sizeof(conn->conn)); + lock_protect(&conn->lock, &conn->conid_list, sizeof(conn->conid_list)); +#ifdef HAVE_NGTCP2_CCERR_DEFAULT + lock_protect(&conn->lock, &conn->ccerr, sizeof(conn->ccerr)); +#else + lock_protect(&conn->lock, &conn->last_error, sizeof(conn->last_error)); +#endif + lock_protect(&conn->lock, &conn->tls_alert, sizeof(conn->tls_alert)); + lock_protect(&conn->lock, &conn->ssl, sizeof(conn->ssl)); + lock_protect(&conn->lock, &conn->close_pkt, sizeof(conn->close_pkt)); + lock_protect(&conn->lock, &conn->close_pkt_len, sizeof(conn->close_pkt_len)); + lock_protect(&conn->lock, &conn->close_ecn, sizeof(conn->close_ecn)); + lock_protect(&conn->lock, &conn->stream_tree, sizeof(conn->stream_tree)); + lock_protect(&conn->lock, &conn->stream_write_first, sizeof(conn->stream_write_first)); + lock_protect(&conn->lock, &conn->stream_write_last, sizeof(conn->stream_write_last)); + lock_protect(&conn->lock, &conn->write_interest, sizeof(conn->write_interest)); + lock_protect(&conn->lock, &conn->on_write_list, sizeof(conn->on_write_list)); + lock_protect(&conn->lock, &conn->write_prev, sizeof(conn->write_prev)); + lock_protect(&conn->lock, &conn->write_next, sizeof(conn->write_next)); + return conn; +} + +/** delete stream tree node */ +static void +stream_tree_del(rbnode_type* node, void* arg) +{ + struct doq_table* table = (struct doq_table*)arg; + struct doq_stream* stream; + if(!node) + return; + stream = (struct doq_stream*)node; + if(stream->in) + doq_table_quic_size_subtract(table, stream->inlen); + if(stream->out) + doq_table_quic_size_subtract(table, stream->outlen); + doq_table_quic_size_subtract(table, sizeof(*stream)); + doq_stream_delete(stream); +} + +void +doq_conn_delete(struct doq_conn* conn, struct doq_table* table) +{ + if(!conn) + return; + lock_basic_destroy(&conn->lock); + lock_rw_wrlock(&conn->table->conid_lock); + doq_conn_clear_conids(conn); + lock_rw_unlock(&conn->table->conid_lock); + ngtcp2_conn_del(conn->conn); + if(conn->stream_tree.count != 0) { + traverse_postorder(&conn->stream_tree, stream_tree_del, table); + } + free(conn->key.dcid); + SSL_free(conn->ssl); + free(conn->close_pkt); + free(conn); +} + +int +doq_conn_cmp(const void* key1, const void* key2) +{ + struct doq_conn* c = (struct doq_conn*)key1; + struct doq_conn* d = (struct doq_conn*)key2; + int r; + /* Compared in the order destination address, then + * local address, ifindex and then dcid. + * So that for a search for findlessorequal for the destination + * address will find connections to that address, with different + * dcids. + * Also a printout in sorted order prints the connections by IP + * address of destination, and then a number of them depending on the + * dcids. */ + if(c->key.paddr.addrlen != d->key.paddr.addrlen) { + if(c->key.paddr.addrlen < d->key.paddr.addrlen) + return -1; + return 1; + } + if((r=memcmp(&c->key.paddr.addr, &d->key.paddr.addr, + c->key.paddr.addrlen))!=0) + return r; + if(c->key.paddr.localaddrlen != d->key.paddr.localaddrlen) { + if(c->key.paddr.localaddrlen < d->key.paddr.localaddrlen) + return -1; + return 1; + } + if((r=memcmp(&c->key.paddr.localaddr, &d->key.paddr.localaddr, + c->key.paddr.localaddrlen))!=0) + return r; + if(c->key.paddr.ifindex != d->key.paddr.ifindex) { + if(c->key.paddr.ifindex < d->key.paddr.ifindex) + return -1; + return 1; + } + if(c->key.dcidlen != d->key.dcidlen) { + if(c->key.dcidlen < d->key.dcidlen) + return -1; + return 1; + } + if((r=memcmp(c->key.dcid, d->key.dcid, c->key.dcidlen))!=0) + return r; + return 0; +} + +int doq_conid_cmp(const void* key1, const void* key2) +{ + struct doq_conid* c = (struct doq_conid*)key1; + struct doq_conid* d = (struct doq_conid*)key2; + if(c->cidlen != d->cidlen) { + if(c->cidlen < d->cidlen) + return -1; + return 1; + } + return memcmp(c->cid, d->cid, c->cidlen); +} + +int doq_timer_cmp(const void* key1, const void* key2) +{ + struct doq_timer* e = (struct doq_timer*)key1; + struct doq_timer* f = (struct doq_timer*)key2; + if(e->time.tv_sec < f->time.tv_sec) + return -1; + if(e->time.tv_sec > f->time.tv_sec) + return 1; + if(e->time.tv_usec < f->time.tv_usec) + return -1; + if(e->time.tv_usec > f->time.tv_usec) + return 1; + return 0; +} + +int doq_stream_cmp(const void* key1, const void* key2) +{ + struct doq_stream* c = (struct doq_stream*)key1; + struct doq_stream* d = (struct doq_stream*)key2; + if(c->stream_id != d->stream_id) { + if(c->stream_id < d->stream_id) + return -1; + return 1; + } + return 0; +} + +/** doq store a local address in repinfo */ +static void +doq_repinfo_store_localaddr(struct comm_reply* repinfo, + struct doq_addr_storage* localaddr, socklen_t localaddrlen) +{ + /* use the pktinfo that we have for ancillary udp data otherwise, + * this saves space for a sockaddr */ + memset(&repinfo->pktinfo, 0, sizeof(repinfo->pktinfo)); + if(addr_is_ip6((void*)localaddr, localaddrlen)) { +#ifdef IPV6_PKTINFO + struct sockaddr_in6* sa6 = (struct sockaddr_in6*)localaddr; + memmove(&repinfo->pktinfo.v6info.ipi6_addr, + &sa6->sin6_addr, sizeof(struct in6_addr)); + repinfo->doq_srcport = sa6->sin6_port; +#endif + repinfo->srctype = 6; + } else { +#ifdef IP_PKTINFO + struct sockaddr_in* sa = (struct sockaddr_in*)localaddr; + memmove(&repinfo->pktinfo.v4info.ipi_addr, + &sa->sin_addr, sizeof(struct in_addr)); + repinfo->doq_srcport = sa->sin_port; +#elif defined(IP_RECVDSTADDR) + struct sockaddr_in* sa = (struct sockaddr_in*)localaddr; + memmove(&repinfo->pktinfo.v4addr, &sa->sin_addr, + sizeof(struct in_addr)); + repinfo->doq_srcport = sa->sin_port; +#endif + repinfo->srctype = 4; + } +} + +/** doq retrieve localaddr from repinfo */ +static void +doq_repinfo_retrieve_localaddr(struct comm_reply* repinfo, + struct doq_addr_storage* localaddr, socklen_t* localaddrlen) +{ + if(repinfo->srctype == 6) { +#ifdef IPV6_PKTINFO + struct sockaddr_in6* sa6 = (struct sockaddr_in6*)localaddr; + *localaddrlen = (socklen_t)sizeof(struct sockaddr_in6); + memset(sa6, 0, *localaddrlen); + sa6->sin6_family = AF_INET6; + memmove(&sa6->sin6_addr, &repinfo->pktinfo.v6info.ipi6_addr, + *localaddrlen); + sa6->sin6_port = repinfo->doq_srcport; +#endif + } else { +#ifdef IP_PKTINFO + struct sockaddr_in* sa = (struct sockaddr_in*)localaddr; + *localaddrlen = (socklen_t)sizeof(struct sockaddr_in); + memset(sa, 0, *localaddrlen); + sa->sin_family = AF_INET; + memmove(&sa->sin_addr, &repinfo->pktinfo.v4info.ipi_addr, + *localaddrlen); + sa->sin_port = repinfo->doq_srcport; +#elif defined(IP_RECVDSTADDR) + struct sockaddr_in* sa = (struct sockaddr_in*)localaddr; + *localaddrlen = (socklen_t)sizeof(struct sockaddr_in); + memset(sa, 0, *localaddrlen); + sa->sin_family = AF_INET; + memmove(&sa->sin_addr, &repinfo->pktinfo.v4addr, + sizeof(struct in_addr)); + sa->sin_port = repinfo->doq_srcport; +#endif + } +} + +/** doq write a connection key into repinfo, false if it does not fit */ +static int +doq_conn_key_store_repinfo(struct doq_conn_key* key, + struct comm_reply* repinfo) +{ + repinfo->is_proxied = 0; + repinfo->doq_ifindex = key->paddr.ifindex; + repinfo->remote_addrlen = key->paddr.addrlen; + memmove(&repinfo->remote_addr, &key->paddr.addr, + repinfo->remote_addrlen); + repinfo->client_addrlen = key->paddr.addrlen; + memmove(&repinfo->client_addr, &key->paddr.addr, + repinfo->client_addrlen); + doq_repinfo_store_localaddr(repinfo, &key->paddr.localaddr, + key->paddr.localaddrlen); + if(key->dcidlen > sizeof(repinfo->doq_dcid)) + return 0; + repinfo->doq_dcidlen = key->dcidlen; + memmove(repinfo->doq_dcid, key->dcid, key->dcidlen); + return 1; +} + +void +doq_conn_key_from_repinfo(struct doq_conn_key* key, struct comm_reply* repinfo) +{ + key->paddr.ifindex = repinfo->doq_ifindex; + key->paddr.addrlen = repinfo->remote_addrlen; + memmove(&key->paddr.addr, &repinfo->remote_addr, + repinfo->remote_addrlen); + doq_repinfo_retrieve_localaddr(repinfo, &key->paddr.localaddr, + &key->paddr.localaddrlen); + key->dcidlen = repinfo->doq_dcidlen; + key->dcid = repinfo->doq_dcid; +} + +/** doq add a stream to the connection */ +static void +doq_conn_add_stream(struct doq_conn* conn, struct doq_stream* stream) +{ + (void)rbtree_insert(&conn->stream_tree, &stream->node); +} + +/** doq delete a stream from the connection */ +static void +doq_conn_del_stream(struct doq_conn* conn, struct doq_stream* stream) +{ + (void)rbtree_delete(&conn->stream_tree, &stream->node); +} + +/** doq create new stream */ +static struct doq_stream* +doq_stream_create(int64_t stream_id) +{ + struct doq_stream* stream = calloc(1, sizeof(*stream)); + if(!stream) + return NULL; + stream->node.key = stream; + stream->stream_id = stream_id; + return stream; +} + +void doq_stream_delete(struct doq_stream* stream) +{ + if(!stream) + return; + free(stream->in); + free(stream->out); + free(stream); +} + +struct doq_stream* +doq_stream_find(struct doq_conn* conn, int64_t stream_id) +{ + rbnode_type* node; + struct doq_stream key; + key.node.key = &key; + key.stream_id = stream_id; + node = rbtree_search(&conn->stream_tree, &key); + if(node) + return (struct doq_stream*)node->key; + return NULL; +} + +/** doq put stream on the conn write list */ +static void +doq_stream_on_write_list(struct doq_conn* conn, struct doq_stream* stream) +{ + if(stream->on_write_list) + return; + stream->write_prev = conn->stream_write_last; + if(conn->stream_write_last) + conn->stream_write_last->write_next = stream; + else + conn->stream_write_first = stream; + conn->stream_write_last = stream; + stream->write_next = NULL; + stream->on_write_list = 1; +} + +/** doq remove stream from the conn write list */ +static void +doq_stream_off_write_list(struct doq_conn* conn, struct doq_stream* stream) +{ + if(!stream->on_write_list) + return; + if(stream->write_next) + stream->write_next->write_prev = stream->write_prev; + else conn->stream_write_last = stream->write_prev; + if(stream->write_prev) + stream->write_prev->write_next = stream->write_next; + else conn->stream_write_first = stream->write_next; + stream->write_prev = NULL; + stream->write_next = NULL; + stream->on_write_list = 0; +} + +/** doq stream remove in buffer */ +static void +doq_stream_remove_in_buffer(struct doq_stream* stream, struct doq_table* table) +{ + if(stream->in) { + doq_table_quic_size_subtract(table, stream->inlen); + free(stream->in); + stream->in = NULL; + stream->inlen = 0; + } +} + +/** doq stream remove out buffer */ +static void +doq_stream_remove_out_buffer(struct doq_stream* stream, + struct doq_table* table) +{ + if(stream->out) { + doq_table_quic_size_subtract(table, stream->outlen); + free(stream->out); + stream->out = NULL; + stream->outlen = 0; + } +} + +int +doq_stream_close(struct doq_conn* conn, struct doq_stream* stream, + int send_shutdown) +{ + int ret; + if(stream->is_closed) + return 1; + stream->is_closed = 1; + doq_stream_off_write_list(conn, stream); + if(send_shutdown) { + verbose(VERB_ALGO, "doq: shutdown stream_id %d with app_error_code %d", + (int)stream->stream_id, (int)DOQ_APP_ERROR_CODE); + ret = ngtcp2_conn_shutdown_stream(conn->conn, +#ifdef HAVE_NGTCP2_CONN_SHUTDOWN_STREAM4 + 0, +#endif + stream->stream_id, DOQ_APP_ERROR_CODE); + if(ret != 0) { + log_err("doq ngtcp2_conn_shutdown_stream %d failed: %s", + (int)stream->stream_id, ngtcp2_strerror(ret)); + return 0; + } + doq_conn_write_enable(conn); + } + doq_stream_remove_in_buffer(stream, conn->doq_socket->table); + doq_stream_remove_out_buffer(stream, conn->doq_socket->table); + doq_table_quic_size_subtract(conn->doq_socket->table, sizeof(*stream)); + doq_conn_del_stream(conn, stream); + doq_stream_delete(stream); + return 1; +} + +/** doq stream pick up answer data from buffer */ +static int +doq_stream_pickup_answer(struct doq_stream* stream, struct sldns_buffer* buf) +{ + stream->is_answer_available = 1; + if(stream->out) { + free(stream->out); + stream->out = NULL; + stream->outlen = 0; + } + stream->nwrite = 0; + stream->outlen = sldns_buffer_limit(buf); + /* For quic the output bytes have to stay allocated and available, + * for potential resends, until the remote end has acknowledged them. + * This includes the tcplen start uint16_t, in outlen_wire. */ + stream->outlen_wire = htons(stream->outlen); + stream->out = memdup(sldns_buffer_begin(buf), sldns_buffer_limit(buf)); + if(!stream->out) { + log_err("doq could not send answer: out of memory"); + return 0; + } + return 1; +} + +int +doq_stream_send_reply(struct doq_conn* conn, struct doq_stream* stream, + struct sldns_buffer* buf) +{ + if(verbosity >= VERB_ALGO) { + char* s = sldns_wire2str_pkt(sldns_buffer_begin(buf), + sldns_buffer_limit(buf)); + verbose(VERB_ALGO, "doq stream %d response\n%s", + (int)stream->stream_id, (s?s:"null")); + free(s); + } + if(stream->out) + doq_table_quic_size_subtract(conn->doq_socket->table, + stream->outlen); + if(!doq_stream_pickup_answer(stream, buf)) + return 0; + doq_table_quic_size_add(conn->doq_socket->table, stream->outlen); + doq_stream_on_write_list(conn, stream); + doq_conn_write_enable(conn); + return 1; +} + +/** doq stream data length has completed, allocations can be done. False on + * allocation failure. */ +static int +doq_stream_datalen_complete(struct doq_stream* stream, struct doq_table* table) +{ + if(stream->inlen > 1024*1024) { + log_err("doq stream in length too large %d", + (int)stream->inlen); + return 0; + } + stream->in = calloc(1, stream->inlen); + if(!stream->in) { + log_err("doq could not read stream, calloc failed: " + "out of memory"); + return 0; + } + doq_table_quic_size_add(table, stream->inlen); + return 1; +} + +/** doq stream data is complete, the input data has been received. */ +static int +doq_stream_data_complete(struct doq_conn* conn, struct doq_stream* stream) +{ + struct comm_point* c; + if(verbosity >= VERB_ALGO) { + char* s = sldns_wire2str_pkt(stream->in, stream->inlen); + char a[128]; + addr_to_str((void*)&conn->key.paddr.addr, + conn->key.paddr.addrlen, a, sizeof(a)); + verbose(VERB_ALGO, "doq %s stream %d incoming query\n%s", + a, (int)stream->stream_id, (s?s:"null")); + free(s); + } + stream->is_query_complete = 1; + c = conn->doq_socket->cp; + if(!stream->in) { + verbose(VERB_ALGO, "doq_stream_data_complete: no in buffer"); + return 0; + } + if(stream->inlen > sldns_buffer_capacity(c->buffer)) { + verbose(VERB_ALGO, "doq_stream_data_complete: query too long"); + return 0; + } + sldns_buffer_clear(c->buffer); + sldns_buffer_write(c->buffer, stream->in, stream->inlen); + sldns_buffer_flip(c->buffer); + c->repinfo.c = c; + if(!doq_conn_key_store_repinfo(&conn->key, &c->repinfo)) { + verbose(VERB_ALGO, "doq_stream_data_complete: connection " + "DCID too long"); + return 0; + } + c->repinfo.doq_streamid = stream->stream_id; + conn->doq_socket->current_conn = conn; + fptr_ok(fptr_whitelist_comm_point(c->callback)); + if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo)) { + conn->doq_socket->current_conn = NULL; + if(!doq_stream_send_reply(conn, stream, c->buffer)) { + verbose(VERB_ALGO, "doq: failed to send_reply"); + return 0; + } + return 1; + } + conn->doq_socket->current_conn = NULL; + return 1; +} + +/** doq receive data for a stream, more bytes of the incoming data */ +static int +doq_stream_recv_data(struct doq_stream* stream, const uint8_t* data, + size_t datalen, int* recv_done, struct doq_table* table) +{ + int got_data = 0; + /* read the tcplength uint16_t at the start */ + if(stream->nread < 2) { + uint16_t tcplen = 0; + size_t todolen = 2 - stream->nread; + + if(stream->nread > 0) { + /* put in the already read byte if there is one */ + tcplen = stream->inlen; + } + if(datalen < todolen) + todolen = datalen; + memmove(((uint8_t*)&tcplen)+stream->nread, data, todolen); + stream->nread += todolen; + data += todolen; + datalen -= todolen; + if(stream->nread == 2) { + /* the initial length value is completed */ + stream->inlen = ntohs(tcplen); + if(!doq_stream_datalen_complete(stream, table)) + return 0; + } else { + /* store for later */ + stream->inlen = tcplen; + return 1; + } + } + /* if there are more data bytes */ + if(datalen > 0) { + size_t to_write = datalen; + if(stream->nread-2 > stream->inlen) { + verbose(VERB_ALGO, "doq stream buffer too small"); + return 0; + } + if(datalen > stream->inlen - (stream->nread-2)) + to_write = stream->inlen - (stream->nread-2); + if(to_write > 0) { + if(!stream->in) { + verbose(VERB_ALGO, "doq: stream has " + "no buffer"); + return 0; + } + memmove(stream->in+(stream->nread-2), data, to_write); + stream->nread += to_write; + data += to_write; + datalen -= to_write; + got_data = 1; + } + } + /* Are there extra bytes received after the end? If so, log them. */ + if(datalen > 0) { + if(verbosity >= VERB_ALGO) + log_hex("doq stream has extra bytes received after end", + (void*)data, datalen); + } + /* Is the input data complete? */ + if(got_data && stream->nread >= stream->inlen+2) { + if(!stream->in) { + verbose(VERB_ALGO, "doq: completed stream has " + "no buffer"); + return 0; + } + *recv_done = 1; + } + return 1; +} + +/** doq receive FIN for a stream. No more bytes are going to arrive. */ +static int +doq_stream_recv_fin(struct doq_conn* conn, struct doq_stream* stream, int + recv_done) +{ + if(!stream->is_query_complete && !recv_done) { + verbose(VERB_ALGO, "doq: stream recv FIN, but is " + "not complete, have %d of %d bytes", + ((int)stream->nread)-2, (int)stream->inlen); + if(!doq_stream_close(conn, stream, 1)) + return 0; + } + return 1; +} + +void doq_fill_rand(struct ub_randstate* rnd, uint8_t* buf, size_t len) +{ + size_t i; + for(i=0; idoq_socket->rnd, data, datalen); + if(!doq_conid_find(conn->table, data, datalen)) { + /* Found an unused connection id. */ + return 1; + } + } + verbose(VERB_ALGO, "doq_conn_generate_new_conid failed: could not " + "generate random unused connection id value in %d attempts.", + max_try); + return 0; +} + +/** ngtcp2 rand callback function */ +static void +doq_rand_cb(uint8_t* dest, size_t destlen, const ngtcp2_rand_ctx* rand_ctx) +{ + struct ub_randstate* rnd = (struct ub_randstate*) + rand_ctx->native_handle; + doq_fill_rand(rnd, dest, destlen); +} + +/** ngtcp2 get_new_connection_id callback function */ +static int +doq_get_new_connection_id_cb(ngtcp2_conn* ATTR_UNUSED(conn), ngtcp2_cid* cid, + uint8_t* token, size_t cidlen, void* user_data) +{ + struct doq_conn* doq_conn = (struct doq_conn*)user_data; + /* Lock the conid tree, so we can check for duplicates while + * generating the id, and then insert it, whilst keeping the tree + * locked against other modifications, guaranteeing uniqueness. */ + lock_rw_wrlock(&doq_conn->table->conid_lock); + if(!doq_conn_generate_new_conid(doq_conn, cid->data, cidlen)) { + lock_rw_unlock(&doq_conn->table->conid_lock); + return NGTCP2_ERR_CALLBACK_FAILURE; + } + cid->datalen = cidlen; + if(ngtcp2_crypto_generate_stateless_reset_token(token, + doq_conn->doq_socket->static_secret, + doq_conn->doq_socket->static_secret_len, cid) != 0) { + lock_rw_unlock(&doq_conn->table->conid_lock); + return NGTCP2_ERR_CALLBACK_FAILURE; + } + if(!doq_conn_associate_conid(doq_conn, cid->data, cid->datalen)) { + lock_rw_unlock(&doq_conn->table->conid_lock); + return NGTCP2_ERR_CALLBACK_FAILURE; + } + lock_rw_unlock(&doq_conn->table->conid_lock); + return 0; +} + +/** ngtcp2 remove_connection_id callback function */ +static int +doq_remove_connection_id_cb(ngtcp2_conn* ATTR_UNUSED(conn), + const ngtcp2_cid* cid, void* user_data) +{ + struct doq_conn* doq_conn = (struct doq_conn*)user_data; + lock_rw_wrlock(&doq_conn->table->conid_lock); + doq_conn_dissociate_conid(doq_conn, cid->data, cid->datalen); + lock_rw_unlock(&doq_conn->table->conid_lock); + return 0; +} + +/** doq submit a new token */ +static int +doq_submit_new_token(struct doq_conn* conn) +{ + uint8_t token[NGTCP2_CRYPTO_MAX_REGULAR_TOKENLEN]; + ngtcp2_ssize tokenlen; + int ret; + const ngtcp2_path* path = ngtcp2_conn_get_path(conn->conn); + ngtcp2_tstamp ts = doq_get_timestamp_nanosec(); + + tokenlen = ngtcp2_crypto_generate_regular_token(token, + conn->doq_socket->static_secret, + conn->doq_socket->static_secret_len, path->remote.addr, + path->remote.addrlen, ts); + if(tokenlen < 0) { + log_err("doq ngtcp2_crypto_generate_regular_token failed"); + return 1; + } + + verbose(VERB_ALGO, "doq submit new token"); + ret = ngtcp2_conn_submit_new_token(conn->conn, token, tokenlen); + if(ret != 0) { + log_err("doq ngtcp2_conn_submit_new_token failed: %s", + ngtcp2_strerror(ret)); + return 0; + } + return 1; +} + +/** ngtcp2 handshake_completed callback function */ +static int +doq_handshake_completed_cb(ngtcp2_conn* ATTR_UNUSED(conn), void* user_data) +{ + struct doq_conn* doq_conn = (struct doq_conn*)user_data; + verbose(VERB_ALGO, "doq handshake_completed callback"); + verbose(VERB_ALGO, "ngtcp2_conn_get_max_data_left is %d", + (int)ngtcp2_conn_get_max_data_left(doq_conn->conn)); +#ifdef HAVE_NGTCP2_CONN_GET_MAX_LOCAL_STREAMS_UNI + verbose(VERB_ALGO, "ngtcp2_conn_get_max_local_streams_uni is %d", + (int)ngtcp2_conn_get_max_local_streams_uni(doq_conn->conn)); +#endif + verbose(VERB_ALGO, "ngtcp2_conn_get_streams_uni_left is %d", + (int)ngtcp2_conn_get_streams_uni_left(doq_conn->conn)); + verbose(VERB_ALGO, "ngtcp2_conn_get_streams_bidi_left is %d", + (int)ngtcp2_conn_get_streams_bidi_left(doq_conn->conn)); + verbose(VERB_ALGO, "negotiated cipher name is %s", + SSL_get_cipher_name(doq_conn->ssl)); + if(verbosity > VERB_ALGO) { + const unsigned char* alpn = NULL; + unsigned int alpnlen = 0; + char alpnstr[128]; + SSL_get0_alpn_selected(doq_conn->ssl, &alpn, &alpnlen); + if(alpnlen > sizeof(alpnstr)-1) + alpnlen = sizeof(alpnstr)-1; + memmove(alpnstr, alpn, alpnlen); + alpnstr[alpnlen]=0; + verbose(VERB_ALGO, "negotiated ALPN is '%s'", alpnstr); + } + + if(!doq_submit_new_token(doq_conn)) + return -1; + return 0; +} + +/** ngtcp2 stream_open callback function */ +static int +doq_stream_open_cb(ngtcp2_conn* ATTR_UNUSED(conn), int64_t stream_id, + void* user_data) +{ + struct doq_conn* doq_conn = (struct doq_conn*)user_data; + struct doq_stream* stream; + verbose(VERB_ALGO, "doq new stream %x", (int)stream_id); + if(doq_stream_find(doq_conn, stream_id)) { + verbose(VERB_ALGO, "doq: stream with this id already exists"); + return 0; + } + if(stream_id != 0 && stream_id != 4 && /* allow one stream on a new connection */ + !doq_table_quic_size_available(doq_conn->doq_socket->table, + doq_conn->doq_socket->cfg, sizeof(*stream) + + 100 /* estimated query in */ + + 512 /* estimated response out */ + )) { + int rv; + verbose(VERB_ALGO, "doq: no mem for new stream"); + rv = ngtcp2_conn_shutdown_stream(doq_conn->conn, +#ifdef HAVE_NGTCP2_CONN_SHUTDOWN_STREAM4 + 0, +#endif + stream_id, NGTCP2_CONNECTION_REFUSED); + if(rv != 0) { + log_err("ngtcp2_conn_shutdown_stream failed: %s", + ngtcp2_strerror(rv)); + return NGTCP2_ERR_CALLBACK_FAILURE; + } + return 0; + } + stream = doq_stream_create(stream_id); + if(!stream) { + log_err("doq: could not doq_stream_create: out of memory"); + return NGTCP2_ERR_CALLBACK_FAILURE; + } + doq_table_quic_size_add(doq_conn->doq_socket->table, sizeof(*stream)); + doq_conn_add_stream(doq_conn, stream); + return 0; +} + +/** ngtcp2 recv_stream_data callback function */ +static int +doq_recv_stream_data_cb(ngtcp2_conn* ATTR_UNUSED(conn), uint32_t flags, + int64_t stream_id, uint64_t offset, const uint8_t* data, + size_t datalen, void* user_data, void* ATTR_UNUSED(stream_user_data)) +{ + int recv_done = 0; + struct doq_conn* doq_conn = (struct doq_conn*)user_data; + struct doq_stream* stream; + verbose(VERB_ALGO, "doq recv stream data stream id %d offset %d " + "datalen %d%s%s", (int)stream_id, (int)offset, (int)datalen, + ((flags&NGTCP2_STREAM_DATA_FLAG_FIN)!=0?" FIN":""), +#ifdef NGTCP2_STREAM_DATA_FLAG_0RTT + ((flags&NGTCP2_STREAM_DATA_FLAG_0RTT)!=0?" 0RTT":"") +#else + ((flags&NGTCP2_STREAM_DATA_FLAG_EARLY)!=0?" EARLY":"") +#endif + ); + stream = doq_stream_find(doq_conn, stream_id); + if(!stream) { + verbose(VERB_ALGO, "doq: received stream data for " + "unknown stream %d", (int)stream_id); + return 0; + } + if(stream->is_closed) { + verbose(VERB_ALGO, "doq: stream is closed, ignore recv data"); + return 0; + } + if(datalen != 0) { + if(!doq_stream_recv_data(stream, data, datalen, &recv_done, + doq_conn->doq_socket->table)) + return NGTCP2_ERR_CALLBACK_FAILURE; + } + if((flags&NGTCP2_STREAM_DATA_FLAG_FIN)!=0) { + if(!doq_stream_recv_fin(doq_conn, stream, recv_done)) + return NGTCP2_ERR_CALLBACK_FAILURE; + } + ngtcp2_conn_extend_max_stream_offset(doq_conn->conn, stream_id, + datalen); + ngtcp2_conn_extend_max_offset(doq_conn->conn, datalen); + if(recv_done) { + if(!doq_stream_data_complete(doq_conn, stream)) + return NGTCP2_ERR_CALLBACK_FAILURE; + } + return 0; +} + +/** ngtcp2 stream_close callback function */ +static int +doq_stream_close_cb(ngtcp2_conn* ATTR_UNUSED(conn), uint32_t flags, + int64_t stream_id, uint64_t app_error_code, void* user_data, + void* ATTR_UNUSED(stream_user_data)) +{ + struct doq_conn* doq_conn = (struct doq_conn*)user_data; + struct doq_stream* stream; + if((flags&NGTCP2_STREAM_CLOSE_FLAG_APP_ERROR_CODE_SET)!=0) + verbose(VERB_ALGO, "doq stream close for stream id %d %sapp_error_code %d", + (int)stream_id, + (((flags&NGTCP2_STREAM_CLOSE_FLAG_APP_ERROR_CODE_SET)!=0)? + "APP_ERROR_CODE_SET ":""), + (int)app_error_code); + else + verbose(VERB_ALGO, "doq stream close for stream id %d", + (int)stream_id); + + stream = doq_stream_find(doq_conn, stream_id); + if(!stream) { + verbose(VERB_ALGO, "doq: stream close for " + "unknown stream %d", (int)stream_id); + return 0; + } + if(!doq_stream_close(doq_conn, stream, 0)) + return NGTCP2_ERR_CALLBACK_FAILURE; + return 0; +} + +/** ngtcp2 stream_reset callback function */ +static int +doq_stream_reset_cb(ngtcp2_conn* ATTR_UNUSED(conn), int64_t stream_id, + uint64_t final_size, uint64_t app_error_code, void* user_data, + void* ATTR_UNUSED(stream_user_data)) +{ + struct doq_conn* doq_conn = (struct doq_conn*)user_data; + struct doq_stream* stream; + verbose(VERB_ALGO, "doq stream reset for stream id %d final_size %d " + "app_error_code %d", (int)stream_id, (int)final_size, + (int)app_error_code); + + stream = doq_stream_find(doq_conn, stream_id); + if(!stream) { + verbose(VERB_ALGO, "doq: stream reset for " + "unknown stream %d", (int)stream_id); + return 0; + } + if(!doq_stream_close(doq_conn, stream, 0)) + return NGTCP2_ERR_CALLBACK_FAILURE; + return 0; +} + +/** ngtcp2 acked_stream_data_offset callback function */ +static int +doq_acked_stream_data_offset_cb(ngtcp2_conn* ATTR_UNUSED(conn), + int64_t stream_id, uint64_t offset, uint64_t datalen, void* user_data, + void* ATTR_UNUSED(stream_user_data)) +{ + struct doq_conn* doq_conn = (struct doq_conn*)user_data; + struct doq_stream* stream; + verbose(VERB_ALGO, "doq stream acked data for stream id %d offset %d " + "datalen %d", (int)stream_id, (int)offset, (int)datalen); + + stream = doq_stream_find(doq_conn, stream_id); + if(!stream) { + verbose(VERB_ALGO, "doq: stream acked data for " + "unknown stream %d", (int)stream_id); + return 0; + } + /* Acked the data from [offset .. offset+datalen). */ + if(stream->is_closed) + return 0; + if(offset+datalen >= stream->outlen) { + doq_stream_remove_in_buffer(stream, + doq_conn->doq_socket->table); + doq_stream_remove_out_buffer(stream, + doq_conn->doq_socket->table); + } + return 0; +} + +/** ngtc2p log_printf callback function */ +static void +doq_log_printf_cb(void* ATTR_UNUSED(user_data), const char* fmt, ...) +{ + char buf[1024]; + va_list ap; + va_start(ap, fmt); + vsnprintf(buf, sizeof(buf), fmt, ap); + verbose(VERB_ALGO, "libngtcp2: %s", buf); + va_end(ap); +} + +#ifndef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT +/** the doq application tx key callback, false on failure */ +static int +doq_application_tx_key_cb(struct doq_conn* conn) +{ + verbose(VERB_ALGO, "doq application tx key cb"); + /* The server does not want to open streams to the client, + * the client instead initiates by opening bidi streams. */ + verbose(VERB_ALGO, "doq ngtcp2_conn_get_max_data_left is %d", + (int)ngtcp2_conn_get_max_data_left(conn->conn)); +#ifdef HAVE_NGTCP2_CONN_GET_MAX_LOCAL_STREAMS_UNI + verbose(VERB_ALGO, "doq ngtcp2_conn_get_max_local_streams_uni is %d", + (int)ngtcp2_conn_get_max_local_streams_uni(conn->conn)); +#endif + verbose(VERB_ALGO, "doq ngtcp2_conn_get_streams_uni_left is %d", + (int)ngtcp2_conn_get_streams_uni_left(conn->conn)); + verbose(VERB_ALGO, "doq ngtcp2_conn_get_streams_bidi_left is %d", + (int)ngtcp2_conn_get_streams_bidi_left(conn->conn)); + return 1; +} + +/** quic_method set_encryption_secrets function */ +static int +doq_set_encryption_secrets(SSL *ssl, OSSL_ENCRYPTION_LEVEL ossl_level, + const uint8_t *read_secret, const uint8_t *write_secret, + size_t secret_len) +{ + struct doq_conn* doq_conn = (struct doq_conn*)SSL_get_app_data(ssl); +#ifdef HAVE_NGTCP2_ENCRYPTION_LEVEL + ngtcp2_encryption_level +#else + ngtcp2_crypto_level +#endif + level = +#ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_FROM_OSSL_ENCRYPTION_LEVEL + ngtcp2_crypto_quictls_from_ossl_encryption_level(ossl_level); +#else + ngtcp2_crypto_openssl_from_ossl_encryption_level(ossl_level); +#endif + + if(read_secret) { + verbose(VERB_ALGO, "doq: ngtcp2_crypto_derive_and_install_rx_key for level %d ossl %d", (int)level, (int)ossl_level); + if(ngtcp2_crypto_derive_and_install_rx_key(doq_conn->conn, + NULL, NULL, NULL, level, read_secret, secret_len) + != 0) { + log_err("ngtcp2_crypto_derive_and_install_rx_key " + "failed"); + return 0; + } + } + + if(write_secret) { + verbose(VERB_ALGO, "doq: ngtcp2_crypto_derive_and_install_tx_key for level %d ossl %d", (int)level, (int)ossl_level); + if(ngtcp2_crypto_derive_and_install_tx_key(doq_conn->conn, + NULL, NULL, NULL, level, write_secret, secret_len) + != 0) { + log_err("ngtcp2_crypto_derive_and_install_tx_key " + "failed"); + return 0; + } + if(level == NGTCP2_CRYPTO_LEVEL_APPLICATION) { + if(!doq_application_tx_key_cb(doq_conn)) + return 0; + } + } + return 1; +} + +/** quic_method add_handshake_data function */ +static int +doq_add_handshake_data(SSL *ssl, OSSL_ENCRYPTION_LEVEL ossl_level, + const uint8_t *data, size_t len) +{ + struct doq_conn* doq_conn = (struct doq_conn*)SSL_get_app_data(ssl); +#ifdef HAVE_NGTCP2_ENCRYPTION_LEVEL + ngtcp2_encryption_level +#else + ngtcp2_crypto_level +#endif + level = +#ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_FROM_OSSL_ENCRYPTION_LEVEL + ngtcp2_crypto_quictls_from_ossl_encryption_level(ossl_level); +#else + ngtcp2_crypto_openssl_from_ossl_encryption_level(ossl_level); +#endif + int rv; + + verbose(VERB_ALGO, "doq_add_handshake_data: " + "ngtcp2_con_submit_crypto_data level %d", (int)level); + rv = ngtcp2_conn_submit_crypto_data(doq_conn->conn, level, data, len); + if(rv != 0) { + log_err("ngtcp2_conn_submit_crypto_data failed: %s", + ngtcp2_strerror(rv)); + ngtcp2_conn_set_tls_error(doq_conn->conn, rv); + return 0; + } + return 1; +} + +/** quic_method flush_flight function */ +static int +doq_flush_flight(SSL* ATTR_UNUSED(ssl)) +{ + return 1; +} + +/** quic_method send_alert function */ +static int +doq_send_alert(SSL *ssl, enum ssl_encryption_level_t ATTR_UNUSED(level), + uint8_t alert) +{ + struct doq_conn* doq_conn = (struct doq_conn*)SSL_get_app_data(ssl); + doq_conn->tls_alert = alert; + return 1; +} +#endif /* HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT */ + +/** ALPN select callback for the doq SSL context */ +static int +doq_alpn_select_cb(SSL* ATTR_UNUSED(ssl), const unsigned char** out, + unsigned char* outlen, const unsigned char* in, unsigned int inlen, + void* ATTR_UNUSED(arg)) +{ + /* select "doq" */ + int ret = SSL_select_next_proto((void*)out, outlen, + (const unsigned char*)"\x03""doq", 4, in, inlen); + if(ret == OPENSSL_NPN_NEGOTIATED) + return SSL_TLSEXT_ERR_OK; + verbose(VERB_ALGO, "doq alpn_select_cb: ALPN from client does " + "not have 'doq'"); + return SSL_TLSEXT_ERR_ALERT_FATAL; +} + +/** create new tls session for server doq connection */ +static SSL_CTX* +doq_ctx_server_setup(struct doq_server_socket* doq_socket) +{ + char* sid_ctx = "unbound server"; +#ifndef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT + SSL_QUIC_METHOD* quic_method; +#endif + SSL_CTX* ctx = SSL_CTX_new(TLS_server_method()); + if(!ctx) { + log_crypto_err("Could not SSL_CTX_new"); + return NULL; + } + SSL_CTX_set_options(ctx, + (SSL_OP_ALL & ~SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS) | + SSL_OP_SINGLE_ECDH_USE | + SSL_OP_CIPHER_SERVER_PREFERENCE | + SSL_OP_NO_ANTI_REPLAY); + SSL_CTX_set_mode(ctx, SSL_MODE_RELEASE_BUFFERS); + SSL_CTX_set_min_proto_version(ctx, TLS1_3_VERSION); + SSL_CTX_set_max_proto_version(ctx, TLS1_3_VERSION); +#ifdef HAVE_SSL_CTX_SET_ALPN_SELECT_CB + SSL_CTX_set_alpn_select_cb(ctx, doq_alpn_select_cb, NULL); +#endif + SSL_CTX_set_default_verify_paths(ctx); + if(!SSL_CTX_use_certificate_chain_file(ctx, + doq_socket->ssl_service_pem)) { + log_err("doq: error for cert file: %s", + doq_socket->ssl_service_pem); + log_crypto_err("doq: error in " + "SSL_CTX_use_certificate_chain_file"); + SSL_CTX_free(ctx); + return NULL; + } + if(!SSL_CTX_use_PrivateKey_file(ctx, doq_socket->ssl_service_key, + SSL_FILETYPE_PEM)) { + log_err("doq: error for private key file: %s", + doq_socket->ssl_service_key); + log_crypto_err("doq: error in SSL_CTX_use_PrivateKey_file"); + SSL_CTX_free(ctx); + return NULL; + } + if(!SSL_CTX_check_private_key(ctx)) { + log_err("doq: error for key file: %s", + doq_socket->ssl_service_key); + log_crypto_err("doq: error in SSL_CTX_check_private_key"); + SSL_CTX_free(ctx); + return NULL; + } + SSL_CTX_set_session_id_context(ctx, (void*)sid_ctx, strlen(sid_ctx)); + if(doq_socket->ssl_verify_pem && doq_socket->ssl_verify_pem[0]) { + if(!SSL_CTX_load_verify_locations(ctx, + doq_socket->ssl_verify_pem, NULL)) { + log_err("doq: error for verify pem file: %s", + doq_socket->ssl_verify_pem); + log_crypto_err("doq: error in " + "SSL_CTX_load_verify_locations"); + SSL_CTX_free(ctx); + return NULL; + } + SSL_CTX_set_client_CA_list(ctx, SSL_load_client_CA_file( + doq_socket->ssl_verify_pem)); + SSL_CTX_set_verify(ctx, SSL_VERIFY_PEER| + SSL_VERIFY_CLIENT_ONCE| + SSL_VERIFY_FAIL_IF_NO_PEER_CERT, NULL); + } + + SSL_CTX_set_max_early_data(ctx, 0xffffffff); +#ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT + if(ngtcp2_crypto_quictls_configure_server_context(ctx) != 0) { + log_err("ngtcp2_crypto_quictls_configure_server_context failed"); + SSL_CTX_free(ctx); + return NULL; + } +#else + /* The quic_method needs to remain valid during the SSL_CTX + * lifetime, so we allocate it. It is freed with the + * doq_server_socket. */ + quic_method = calloc(1, sizeof(SSL_QUIC_METHOD)); + if(!quic_method) { + log_err("calloc failed: out of memory"); + SSL_CTX_free(ctx); + return NULL; + } + doq_socket->quic_method = quic_method; + quic_method->set_encryption_secrets = doq_set_encryption_secrets; + quic_method->add_handshake_data = doq_add_handshake_data; + quic_method->flush_flight = doq_flush_flight; + quic_method->send_alert = doq_send_alert; + SSL_CTX_set_quic_method(ctx, doq_socket->quic_method); +#endif + return ctx; +} + +/** Get the ngtcp2_conn from ssl userdata of type ngtcp2_conn_ref */ +static ngtcp2_conn* doq_conn_ref_get_conn(ngtcp2_crypto_conn_ref* conn_ref) +{ + struct doq_conn* conn = (struct doq_conn*)conn_ref->user_data; + return conn->conn; +} + +/** create new SSL session for server connection */ +static SSL* +doq_ssl_server_setup(SSL_CTX* ctx, struct doq_conn* conn) +{ + SSL* ssl = SSL_new(ctx); + if(!ssl) { + log_crypto_err("doq: SSL_new failed"); + return NULL; + } +#ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT + conn->conn_ref.get_conn = &doq_conn_ref_get_conn; + conn->conn_ref.user_data = conn; + SSL_set_app_data(ssl, &conn->conn_ref); +#else + SSL_set_app_data(ssl, conn); +#endif + SSL_set_accept_state(ssl); + SSL_set_quic_early_data_enabled(ssl, 1); + return ssl; +} + +/** setup the doq_socket server tls context */ +int +doq_socket_setup_ctx(struct doq_server_socket* doq_socket) +{ + doq_socket->ctx = doq_ctx_server_setup(doq_socket); + if(!doq_socket->ctx) + return 0; + return 1; +} + +int +doq_conn_setup(struct doq_conn* conn, uint8_t* scid, size_t scidlen, + uint8_t* ocid, size_t ocidlen, const uint8_t* token, size_t tokenlen) +{ + int rv; + struct ngtcp2_cid dcid, sv_scid, scid_cid; + struct ngtcp2_path path; + struct ngtcp2_callbacks callbacks; + struct ngtcp2_settings settings; + struct ngtcp2_transport_params params; + memset(&dcid, 0, sizeof(dcid)); + memset(&sv_scid, 0, sizeof(sv_scid)); + memset(&scid_cid, 0, sizeof(scid_cid)); + memset(&path, 0, sizeof(path)); + memset(&callbacks, 0, sizeof(callbacks)); + memset(&settings, 0, sizeof(settings)); + memset(¶ms, 0, sizeof(params)); + + ngtcp2_cid_init(&scid_cid, scid, scidlen); + ngtcp2_cid_init(&dcid, conn->key.dcid, conn->key.dcidlen); + + path.remote.addr = (struct sockaddr*)&conn->key.paddr.addr; + path.remote.addrlen = conn->key.paddr.addrlen; + path.local.addr = (struct sockaddr*)&conn->key.paddr.localaddr; + path.local.addrlen = conn->key.paddr.localaddrlen; + + callbacks.recv_client_initial = ngtcp2_crypto_recv_client_initial_cb; + callbacks.recv_crypto_data = ngtcp2_crypto_recv_crypto_data_cb; + callbacks.encrypt = ngtcp2_crypto_encrypt_cb; + callbacks.decrypt = ngtcp2_crypto_decrypt_cb; + callbacks.hp_mask = ngtcp2_crypto_hp_mask; + callbacks.update_key = ngtcp2_crypto_update_key_cb; + callbacks.delete_crypto_aead_ctx = + ngtcp2_crypto_delete_crypto_aead_ctx_cb; + callbacks.delete_crypto_cipher_ctx = + ngtcp2_crypto_delete_crypto_cipher_ctx_cb; + callbacks.get_path_challenge_data = + ngtcp2_crypto_get_path_challenge_data_cb; + callbacks.version_negotiation = ngtcp2_crypto_version_negotiation_cb; + callbacks.rand = doq_rand_cb; + callbacks.get_new_connection_id = doq_get_new_connection_id_cb; + callbacks.remove_connection_id = doq_remove_connection_id_cb; + callbacks.handshake_completed = doq_handshake_completed_cb; + callbacks.stream_open = doq_stream_open_cb; + callbacks.stream_close = doq_stream_close_cb; + callbacks.stream_reset = doq_stream_reset_cb; + callbacks.acked_stream_data_offset = doq_acked_stream_data_offset_cb; + callbacks.recv_stream_data = doq_recv_stream_data_cb; + + ngtcp2_settings_default(&settings); + if(verbosity >= VERB_ALGO) { + settings.log_printf = doq_log_printf_cb; + } + settings.rand_ctx.native_handle = conn->doq_socket->rnd; + settings.initial_ts = doq_get_timestamp_nanosec(); + settings.max_stream_window = 6*1024*1024; + settings.max_window = 6*1024*1024; +#ifdef HAVE_STRUCT_NGTCP2_SETTINGS_TOKENLEN + settings.token = (void*)token; + settings.tokenlen = tokenlen; +#else + settings.token.base = (void*)token; + settings.token.len = tokenlen; +#endif + + ngtcp2_transport_params_default(¶ms); + params.max_idle_timeout = conn->doq_socket->idle_timeout; + params.active_connection_id_limit = 7; + params.initial_max_stream_data_bidi_local = 256*1024; + params.initial_max_stream_data_bidi_remote = 256*1024; + params.initial_max_data = 1024*1024; + /* DoQ uses bidi streams, so we allow 0 uni streams. */ + params.initial_max_streams_uni = 0; + /* Initial max on number of bidi streams the remote end can open. + * That is the number of queries it can make, at first. */ + params.initial_max_streams_bidi = 10; + if(ocid) { + ngtcp2_cid_init(¶ms.original_dcid, ocid, ocidlen); + ngtcp2_cid_init(¶ms.retry_scid, conn->key.dcid, + conn->key.dcidlen); + params.retry_scid_present = 1; + } else { + ngtcp2_cid_init(¶ms.original_dcid, conn->key.dcid, + conn->key.dcidlen); + } +#ifdef HAVE_STRUCT_NGTCP2_TRANSPORT_PARAMS_ORIGINAL_DCID_PRESENT + params.original_dcid_present = 1; +#endif + doq_fill_rand(conn->doq_socket->rnd, params.stateless_reset_token, + sizeof(params.stateless_reset_token)); + sv_scid.datalen = conn->doq_socket->sv_scidlen; + lock_rw_wrlock(&conn->table->conid_lock); + if(!doq_conn_generate_new_conid(conn, sv_scid.data, sv_scid.datalen)) { + lock_rw_unlock(&conn->table->conid_lock); + return 0; + } + + rv = ngtcp2_conn_server_new(&conn->conn, &scid_cid, &sv_scid, &path, + conn->version, &callbacks, &settings, ¶ms, NULL, conn); + if(rv != 0) { + lock_rw_unlock(&conn->table->conid_lock); + log_err("ngtcp2_conn_server_new failed: %s", + ngtcp2_strerror(rv)); + return 0; + } + if(!doq_conn_setup_conids(conn)) { + lock_rw_unlock(&conn->table->conid_lock); + log_err("doq_conn_setup_conids failed: out of memory"); + return 0; + } + lock_rw_unlock(&conn->table->conid_lock); + conn->ssl = doq_ssl_server_setup((SSL_CTX*)conn->doq_socket->ctx, + conn); + if(!conn->ssl) { + log_err("doq_ssl_server_setup failed"); + return 0; + } + ngtcp2_conn_set_tls_native_handle(conn->conn, conn->ssl); + doq_conn_write_enable(conn); + return 1; +} + +struct doq_conid* +doq_conid_find(struct doq_table* table, const uint8_t* data, size_t datalen) +{ + struct rbnode_type* node; + struct doq_conid key; + key.node.key = &key; + key.cid = (void*)data; + key.cidlen = datalen; + node = rbtree_search(table->conid_tree, &key); + if(node) + return (struct doq_conid*)node->key; + return NULL; +} + +/** insert conid in the conid list */ +static void +doq_conid_list_insert(struct doq_conn* conn, struct doq_conid* conid) +{ + conid->prev = NULL; + conid->next = conn->conid_list; + if(conn->conid_list) + conn->conid_list->prev = conid; + conn->conid_list = conid; +} + +/** remove conid from the conid list */ +static void +doq_conid_list_remove(struct doq_conn* conn, struct doq_conid* conid) +{ + if(conid->prev) + conid->prev->next = conid->next; + else conn->conid_list = conid->next; + if(conid->next) + conid->next->prev = conid->prev; +} + +/** create a doq_conid */ +static struct doq_conid* +doq_conid_create(uint8_t* data, size_t datalen, struct doq_conn_key* key) +{ + struct doq_conid* conid; + conid = calloc(1, sizeof(*conid)); + if(!conid) + return NULL; + conid->cid = memdup(data, datalen); + if(!conid->cid) { + free(conid); + return NULL; + } + conid->cidlen = datalen; + conid->node.key = conid; + conid->key = *key; + conid->key.dcid = memdup(key->dcid, key->dcidlen); + if(!conid->key.dcid) { + free(conid->cid); + free(conid); + return NULL; + } + return conid; +} + +void +doq_conid_delete(struct doq_conid* conid) +{ + if(!conid) + return; + free(conid->key.dcid); + free(conid->cid); + free(conid); +} + +/** return true if the conid is for the conn. */ +static int +conid_is_for_conn(struct doq_conn* conn, struct doq_conid* conid) +{ + if(conid->key.dcidlen == conn->key.dcidlen && + memcmp(conid->key.dcid, conn->key.dcid, conid->key.dcidlen)==0 + && conid->key.paddr.addrlen == conn->key.paddr.addrlen && + memcmp(&conid->key.paddr.addr, &conn->key.paddr.addr, + conid->key.paddr.addrlen) == 0 && + conid->key.paddr.localaddrlen == conn->key.paddr.localaddrlen && + memcmp(&conid->key.paddr.localaddr, &conn->key.paddr.localaddr, + conid->key.paddr.localaddrlen) == 0 && + conid->key.paddr.ifindex == conn->key.paddr.ifindex) + return 1; + return 0; +} + +int +doq_conn_associate_conid(struct doq_conn* conn, uint8_t* data, size_t datalen) +{ + struct doq_conid* conid; + conid = doq_conid_find(conn->table, data, datalen); + if(conid && !conid_is_for_conn(conn, conid)) { + verbose(VERB_ALGO, "doq connection id already exists for " + "another doq_conn. Ignoring second connection id."); + /* Already exists to another conn, ignore it. + * This works, in that the conid is listed in the doq_conn + * conid_list element, and removed from there. So our conid + * tree and list are fine, when created and removed. + * The tree now does not have the lookup element pointing + * to this connection. */ + return 1; + } + if(conid) + return 1; /* already inserted */ + conid = doq_conid_create(data, datalen, &conn->key); + if(!conid) + return 0; + doq_conid_list_insert(conn, conid); + (void)rbtree_insert(conn->table->conid_tree, &conid->node); + return 1; +} + +void +doq_conn_dissociate_conid(struct doq_conn* conn, const uint8_t* data, + size_t datalen) +{ + struct doq_conid* conid; + conid = doq_conid_find(conn->table, data, datalen); + if(conid && !conid_is_for_conn(conn, conid)) + return; + if(conid) { + (void)rbtree_delete(conn->table->conid_tree, + conid->node.key); + doq_conid_list_remove(conn, conid); + doq_conid_delete(conid); + } +} + +/** associate the scid array and also the dcid. + * caller must hold the locks on conn and doq_table.conid_lock. */ +static int +doq_conn_setup_id_array_and_dcid(struct doq_conn* conn, + struct ngtcp2_cid* scids, size_t num_scid) +{ + size_t i; + for(i=0; ikey.dcid, conn->key.dcidlen)) + return 0; + return 1; +} + +int +doq_conn_setup_conids(struct doq_conn* conn) +{ + size_t num_scid = +#ifndef HAVE_NGTCP2_CONN_GET_NUM_SCID + ngtcp2_conn_get_scid(conn->conn, NULL); +#else + ngtcp2_conn_get_num_scid(conn->conn); +#endif + if(num_scid <= 4) { + struct ngtcp2_cid ids[4]; + /* Usually there are not that many scids when just accepted, + * like only 2. */ + ngtcp2_conn_get_scid(conn->conn, ids); + return doq_conn_setup_id_array_and_dcid(conn, ids, num_scid); + } else { + struct ngtcp2_cid *scids = calloc(num_scid, + sizeof(struct ngtcp2_cid)); + if(!scids) + return 0; + ngtcp2_conn_get_scid(conn->conn, scids); + if(!doq_conn_setup_id_array_and_dcid(conn, scids, num_scid)) { + free(scids); + return 0; + } + free(scids); + } + return 1; +} + +void +doq_conn_clear_conids(struct doq_conn* conn) +{ + struct doq_conid* p, *next; + if(!conn) + return; + p = conn->conid_list; + while(p) { + next = p->next; + (void)rbtree_delete(conn->table->conid_tree, p->node.key); + doq_conid_delete(p); + p = next; + } + conn->conid_list = NULL; +} + +ngtcp2_tstamp doq_get_timestamp_nanosec(void) +{ +#ifdef CLOCK_REALTIME + struct timespec tp; + memset(&tp, 0, sizeof(tp)); + /* Get a nanosecond time, that can be compared with the event base. */ + if(clock_gettime(CLOCK_REALTIME, &tp) == -1) { + log_err("clock_gettime failed: %s", strerror(errno)); + } + return ((uint64_t)tp.tv_sec)*((uint64_t)1000000000) + + ((uint64_t)tp.tv_nsec); +#else + struct timeval tv; + if(gettimeofday(&tv, NULL) < 0) { + log_err("gettimeofday failed: %s", strerror(errno)); + } + return ((uint64_t)tv.tv_sec)*((uint64_t)1000000000) + + ((uint64_t)tv.tv_usec)*((uint64_t)1000); +#endif /* CLOCK_REALTIME */ +} + +/** doq start the closing period for the connection. */ +static int +doq_conn_start_closing_period(struct comm_point* c, struct doq_conn* conn) +{ + struct ngtcp2_path_storage ps; + struct ngtcp2_pkt_info pi; + ngtcp2_ssize ret; + if(!conn) + return 1; + if( +#ifdef HAVE_NGTCP2_CONN_IN_CLOSING_PERIOD + ngtcp2_conn_in_closing_period(conn->conn) +#else + ngtcp2_conn_is_in_closing_period(conn->conn) +#endif + ) + return 1; + if( +#ifdef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD + ngtcp2_conn_in_draining_period(conn->conn) +#else + ngtcp2_conn_is_in_draining_period(conn->conn) +#endif + ) { + doq_conn_write_disable(conn); + return 1; + } + ngtcp2_path_storage_zero(&ps); + sldns_buffer_clear(c->doq_socket->pkt_buf); + /* the call to ngtcp2_conn_write_connection_close causes the + * conn to be closed. It is now in the closing period. */ + ret = ngtcp2_conn_write_connection_close(conn->conn, &ps.path, + &pi, sldns_buffer_begin(c->doq_socket->pkt_buf), + sldns_buffer_remaining(c->doq_socket->pkt_buf), +#ifdef HAVE_NGTCP2_CCERR_DEFAULT + &conn->ccerr +#else + &conn->last_error +#endif + , doq_get_timestamp_nanosec()); + if(ret < 0) { + log_err("doq ngtcp2_conn_write_connection_close failed: %s", + ngtcp2_strerror(ret)); + return 0; + } + if(ret == 0) { + return 0; + } + sldns_buffer_set_position(c->doq_socket->pkt_buf, ret); + sldns_buffer_flip(c->doq_socket->pkt_buf); + + /* The close packet is allocated, because it may have to be repeated. + * When incoming packets have this connection dcid. */ + conn->close_pkt = memdup(sldns_buffer_begin(c->doq_socket->pkt_buf), + sldns_buffer_limit(c->doq_socket->pkt_buf)); + if(!conn->close_pkt) { + log_err("doq: could not allocate close packet: out of memory"); + return 0; + } + conn->close_pkt_len = sldns_buffer_limit(c->doq_socket->pkt_buf); + conn->close_ecn = pi.ecn; + return 1; +} + +/** doq send the close packet for the connection, perhaps again. */ +int +doq_conn_send_close(struct comm_point* c, struct doq_conn* conn) +{ + if(!conn) + return 0; + if(!conn->close_pkt) + return 0; + if(conn->close_pkt_len > sldns_buffer_capacity(c->doq_socket->pkt_buf)) + return 0; + sldns_buffer_clear(c->doq_socket->pkt_buf); + sldns_buffer_write(c->doq_socket->pkt_buf, conn->close_pkt, conn->close_pkt_len); + sldns_buffer_flip(c->doq_socket->pkt_buf); + verbose(VERB_ALGO, "doq send connection close"); + doq_send_pkt(c, &conn->key.paddr, conn->close_ecn); + doq_conn_write_disable(conn); + return 1; +} + +/** doq close the connection on error. If it returns a failure, it + * does not wait to send a close, and the connection can be dropped. */ +static int +doq_conn_close_error(struct comm_point* c, struct doq_conn* conn) +{ +#ifdef HAVE_NGTCP2_CCERR_DEFAULT + if(conn->ccerr.type == NGTCP2_CCERR_TYPE_IDLE_CLOSE) + return 0; +#else + if(conn->last_error.type == + NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT_IDLE_CLOSE) + return 0; +#endif + if(!doq_conn_start_closing_period(c, conn)) + return 0; + if( +#ifdef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD + ngtcp2_conn_in_draining_period(conn->conn) +#else + ngtcp2_conn_is_in_draining_period(conn->conn) +#endif + ) { + doq_conn_write_disable(conn); + return 1; + } + doq_conn_write_enable(conn); + if(!doq_conn_send_close(c, conn)) + return 0; + return 1; +} + +int +doq_conn_recv(struct comm_point* c, struct doq_pkt_addr* paddr, + struct doq_conn* conn, struct ngtcp2_pkt_info* pi, int* err_retry, + int* err_drop) +{ + int ret; + ngtcp2_tstamp ts; + struct ngtcp2_path path; + memset(&path, 0, sizeof(path)); + path.remote.addr = (struct sockaddr*)&paddr->addr; + path.remote.addrlen = paddr->addrlen; + path.local.addr = (struct sockaddr*)&paddr->localaddr; + path.local.addrlen = paddr->localaddrlen; + ts = doq_get_timestamp_nanosec(); + + ret = ngtcp2_conn_read_pkt(conn->conn, &path, pi, + sldns_buffer_begin(c->doq_socket->pkt_buf), + sldns_buffer_limit(c->doq_socket->pkt_buf), ts); + if(ret != 0) { + if(err_retry) + *err_retry = 0; + if(err_drop) + *err_drop = 0; + if(ret == NGTCP2_ERR_DRAINING) { + verbose(VERB_ALGO, "ngtcp2_conn_read_pkt returned %s", + ngtcp2_strerror(ret)); + doq_conn_write_disable(conn); + return 0; + } else if(ret == NGTCP2_ERR_DROP_CONN) { + verbose(VERB_ALGO, "ngtcp2_conn_read_pkt returned %s", + ngtcp2_strerror(ret)); + if(err_drop) + *err_drop = 1; + return 0; + } else if(ret == NGTCP2_ERR_RETRY) { + verbose(VERB_ALGO, "ngtcp2_conn_read_pkt returned %s", + ngtcp2_strerror(ret)); + if(err_retry) + *err_retry = 1; + if(err_drop) + *err_drop = 1; + return 0; + } else if(ret == NGTCP2_ERR_CRYPTO) { + if( +#ifdef HAVE_NGTCP2_CCERR_DEFAULT + !conn->ccerr.error_code +#else + !conn->last_error.error_code +#endif + ) { + /* in picotls the tls alert may need to be + * copied, but this is with openssl. And there + * is conn->tls_alert. */ +#ifdef HAVE_NGTCP2_CCERR_DEFAULT + ngtcp2_ccerr_set_tls_alert(&conn->ccerr, + conn->tls_alert, NULL, 0); +#else + ngtcp2_connection_close_error_set_transport_error_tls_alert( + &conn->last_error, conn->tls_alert, + NULL, 0); +#endif + } + } else { + if( +#ifdef HAVE_NGTCP2_CCERR_DEFAULT + !conn->ccerr.error_code +#else + !conn->last_error.error_code +#endif + ) { +#ifdef HAVE_NGTCP2_CCERR_DEFAULT + ngtcp2_ccerr_set_liberr(&conn->ccerr, ret, + NULL, 0); +#else + ngtcp2_connection_close_error_set_transport_error_liberr( + &conn->last_error, ret, NULL, 0); +#endif + } + } + log_err("ngtcp2_conn_read_pkt failed: %s", + ngtcp2_strerror(ret)); + if(!doq_conn_close_error(c, conn)) { + if(err_drop) + *err_drop = 1; + } + return 0; + } + doq_conn_write_enable(conn); + return 1; +} + +/** doq stream write is done */ +static void +doq_stream_write_is_done(struct doq_conn* conn, struct doq_stream* stream) +{ + /* Cannot deallocate, the buffer may be needed for resends. */ + doq_stream_off_write_list(conn, stream); +} + +int +doq_conn_write_streams(struct comm_point* c, struct doq_conn* conn, + int* err_drop) +{ + struct doq_stream* stream = conn->stream_write_first; + ngtcp2_path_storage ps; + ngtcp2_tstamp ts = doq_get_timestamp_nanosec(); + size_t num_packets = 0, max_packets = 65535; + ngtcp2_path_storage_zero(&ps); + + for(;;) { + int64_t stream_id; + uint32_t flags = 0; + ngtcp2_pkt_info pi; + ngtcp2_vec datav[2]; + size_t datav_count = 0; + ngtcp2_ssize ret, ndatalen = 0; + int fin; + + if(stream) { + /* data to send */ + verbose(VERB_ALGO, "doq: doq_conn write stream %d", + (int)stream->stream_id); + stream_id = stream->stream_id; + fin = 1; + if(stream->nwrite < 2) { + datav[0].base = ((uint8_t*)&stream-> + outlen_wire) + stream->nwrite; + datav[0].len = 2 - stream->nwrite; + datav[1].base = stream->out; + datav[1].len = stream->outlen; + datav_count = 2; + } else { + datav[0].base = stream->out + + (stream->nwrite-2); + datav[0].len = stream->outlen - + (stream->nwrite-2); + datav_count = 1; + } + } else { + /* no data to send */ + verbose(VERB_ALGO, "doq: doq_conn write stream -1"); + stream_id = -1; + fin = 0; + datav[0].base = NULL; + datav[0].len = 0; + datav_count = 1; + } + + /* if more streams, set it to write more */ + if(stream && stream->write_next) + flags |= NGTCP2_WRITE_STREAM_FLAG_MORE; + if(fin) + flags |= NGTCP2_WRITE_STREAM_FLAG_FIN; + + sldns_buffer_clear(c->doq_socket->pkt_buf); + ret = ngtcp2_conn_writev_stream(conn->conn, &ps.path, &pi, + sldns_buffer_begin(c->doq_socket->pkt_buf), + sldns_buffer_remaining(c->doq_socket->pkt_buf), + &ndatalen, flags, stream_id, datav, datav_count, ts); + if(ret < 0) { + if(ret == NGTCP2_ERR_WRITE_MORE) { + verbose(VERB_ALGO, "doq: write more, ndatalen %d", (int)ndatalen); + if(stream) { + if(ndatalen >= 0) + stream->nwrite += ndatalen; + if(stream->nwrite >= stream->outlen+2) + doq_stream_write_is_done( + conn, stream); + stream = stream->write_next; + } + continue; + } else if(ret == NGTCP2_ERR_STREAM_DATA_BLOCKED) { + verbose(VERB_ALGO, "doq: ngtcp2_conn_writev_stream returned NGTCP2_ERR_STREAM_DATA_BLOCKED"); +#ifdef HAVE_NGTCP2_CCERR_DEFAULT + ngtcp2_ccerr_set_application_error( + &conn->ccerr, -1, NULL, 0); +#else + ngtcp2_connection_close_error_set_application_error(&conn->last_error, -1, NULL, 0); +#endif + if(err_drop) + *err_drop = 0; + if(!doq_conn_close_error(c, conn)) { + if(err_drop) + *err_drop = 1; + } + return 0; + } else if(ret == NGTCP2_ERR_STREAM_SHUT_WR) { + verbose(VERB_ALGO, "doq: ngtcp2_conn_writev_stream returned NGTCP2_ERR_STREAM_SHUT_WR"); +#ifdef HAVE_NGTCP2_CCERR_DEFAULT + ngtcp2_ccerr_set_application_error( + &conn->ccerr, -1, NULL, 0); +#else + ngtcp2_connection_close_error_set_application_error(&conn->last_error, -1, NULL, 0); +#endif + if(err_drop) + *err_drop = 0; + if(!doq_conn_close_error(c, conn)) { + if(err_drop) + *err_drop = 1; + } + return 0; + } + + log_err("doq: ngtcp2_conn_writev_stream failed: %s", + ngtcp2_strerror(ret)); +#ifdef HAVE_NGTCP2_CCERR_DEFAULT + ngtcp2_ccerr_set_liberr(&conn->ccerr, ret, NULL, 0); +#else + ngtcp2_connection_close_error_set_transport_error_liberr( + &conn->last_error, ret, NULL, 0); +#endif + if(err_drop) + *err_drop = 0; + if(!doq_conn_close_error(c, conn)) { + if(err_drop) + *err_drop = 1; + } + return 0; + } + verbose(VERB_ALGO, "doq: writev_stream pkt size %d ndatawritten %d", + (int)ret, (int)ndatalen); + + if(ndatalen >= 0 && stream) { + stream->nwrite += ndatalen; + if(stream->nwrite >= stream->outlen+2) + doq_stream_write_is_done(conn, stream); + } + if(ret == 0) { + /* congestion limited */ + doq_conn_write_disable(conn); + ngtcp2_conn_update_pkt_tx_time(conn->conn, ts); + return 1; + } + sldns_buffer_set_position(c->doq_socket->pkt_buf, ret); + sldns_buffer_flip(c->doq_socket->pkt_buf); + doq_send_pkt(c, &conn->key.paddr, pi.ecn); + + if(c->doq_socket->have_blocked_pkt) + break; + if(++num_packets == max_packets) + break; + if(stream) + stream = stream->write_next; + } + ngtcp2_conn_update_pkt_tx_time(conn->conn, ts); + return 1; +} + +void +doq_conn_write_enable(struct doq_conn* conn) +{ + conn->write_interest = 1; +} + +void +doq_conn_write_disable(struct doq_conn* conn) +{ + conn->write_interest = 0; +} + +/** doq append the connection to the write list */ +static void +doq_conn_write_list_append(struct doq_table* table, struct doq_conn* conn) +{ + if(conn->on_write_list) + return; + conn->write_prev = table->write_list_last; + if(table->write_list_last) + table->write_list_last->write_next = conn; + else table->write_list_first = conn; + conn->write_next = NULL; + table->write_list_last = conn; + conn->on_write_list = 1; +} + +void +doq_conn_write_list_remove(struct doq_table* table, struct doq_conn* conn) +{ + if(!conn->on_write_list) + return; + if(conn->write_next) + conn->write_next->write_prev = conn->write_prev; + else table->write_list_last = conn->write_prev; + if(conn->write_prev) + conn->write_prev->write_next = conn->write_next; + else table->write_list_first = conn->write_next; + conn->write_prev = NULL; + conn->write_next = NULL; + conn->on_write_list = 0; +} + +void +doq_conn_set_write_list(struct doq_table* table, struct doq_conn* conn) +{ + if(conn->write_interest && conn->on_write_list) + return; + if(!conn->write_interest && !conn->on_write_list) + return; + if(conn->write_interest) + doq_conn_write_list_append(table, conn); + else doq_conn_write_list_remove(table, conn); +} + +struct doq_conn* +doq_table_pop_first(struct doq_table* table) +{ + struct doq_conn* conn = table->write_list_first; + if(!conn) + return NULL; + lock_basic_lock(&conn->lock); + table->write_list_first = conn->write_next; + if(conn->write_next) + conn->write_next->write_prev = NULL; + else table->write_list_last = NULL; + conn->write_next = NULL; + conn->write_prev = NULL; + conn->on_write_list = 0; + return conn; +} + +int +doq_conn_check_timer(struct doq_conn* conn, struct timeval* tv) +{ + ngtcp2_tstamp expiry = ngtcp2_conn_get_expiry(conn->conn); + ngtcp2_tstamp now = doq_get_timestamp_nanosec(); + ngtcp2_tstamp t; + + if(expiry <= now) { + /* The timer has already expired, add with zero timeout. + * This should call the callback straight away. Calling it + * from the event callbacks is cleaner than calling it here, + * because then it is always called with the same locks and + * so on. This routine only has the conn.lock. */ + t = now; + } else { + t = expiry; + } + + /* convert to timeval */ + memset(tv, 0, sizeof(*tv)); + tv->tv_sec = t / NGTCP2_SECONDS; + tv->tv_usec = (t / NGTCP2_MICROSECONDS)%1000000; + + /* If we already have a timer, is it the right value? */ + if(conn->timer.timer_in_tree || conn->timer.timer_in_list) { + if(conn->timer.time.tv_sec == tv->tv_sec && + conn->timer.time.tv_usec == tv->tv_usec) + return 0; + } + return 1; +} + +/* doq print connection log */ +static void +doq_conn_log_line(struct doq_conn* conn, char* s) +{ + char remotestr[256], localstr[256]; + addr_to_str((void*)&conn->key.paddr.addr, conn->key.paddr.addrlen, + remotestr, sizeof(remotestr)); + addr_to_str((void*)&conn->key.paddr.localaddr, + conn->key.paddr.localaddrlen, localstr, sizeof(localstr)); + log_info("doq conn %s %s %s", remotestr, localstr, s); +} + +int +doq_conn_handle_timeout(struct doq_conn* conn) +{ + ngtcp2_tstamp now = doq_get_timestamp_nanosec(); + int rv; + + if(verbosity >= VERB_ALGO) + doq_conn_log_line(conn, "timeout"); + + rv = ngtcp2_conn_handle_expiry(conn->conn, now); + if(rv != 0) { + verbose(VERB_ALGO, "ngtcp2_conn_handle_expiry failed: %s", + ngtcp2_strerror(rv)); +#ifdef HAVE_NGTCP2_CCERR_DEFAULT + ngtcp2_ccerr_set_liberr(&conn->ccerr, rv, NULL, 0); +#else + ngtcp2_connection_close_error_set_transport_error_liberr( + &conn->last_error, rv, NULL, 0); +#endif + if(!doq_conn_close_error(conn->doq_socket->cp, conn)) { + /* failed, return for deletion */ + return 0; + } + return 1; + } + doq_conn_write_enable(conn); + if(!doq_conn_write_streams(conn->doq_socket->cp, conn, NULL)) { + /* failed, return for deletion. */ + return 0; + } + return 1; +} + +void +doq_table_quic_size_add(struct doq_table* table, size_t add) +{ + lock_basic_lock(&table->size_lock); + table->current_size += add; + lock_basic_unlock(&table->size_lock); +} + +void +doq_table_quic_size_subtract(struct doq_table* table, size_t subtract) +{ + lock_basic_lock(&table->size_lock); + if(table->current_size < subtract) + table->current_size = 0; + else table->current_size -= subtract; + lock_basic_unlock(&table->size_lock); +} + +int +doq_table_quic_size_available(struct doq_table* table, + struct config_file* cfg, size_t mem) +{ + size_t cur; + lock_basic_lock(&table->size_lock); + cur = table->current_size; + lock_basic_unlock(&table->size_lock); + + if(cur + mem > cfg->quic_size) + return 0; + return 1; +} + +size_t doq_table_quic_size_get(struct doq_table* table) +{ + size_t sz; + if(!table) + return 0; + lock_basic_lock(&table->size_lock); + sz = table->current_size; + lock_basic_unlock(&table->size_lock); + return sz; +} +#endif /* HAVE_NGTCP2 */ diff --git a/services/listen_dnsport.h b/services/listen_dnsport.h index 84ac4b068..c29f4d72b 100644 --- a/services/listen_dnsport.h +++ b/services/listen_dnsport.h @@ -43,10 +43,16 @@ #define LISTEN_DNSPORT_H #include "util/netevent.h" +#include "util/rbtree.h" +#include "util/locks.h" #include "daemon/acl_list.h" #ifdef HAVE_NGHTTP2_NGHTTP2_H #include #endif +#ifdef HAVE_NGTCP2 +#include +#include +#endif struct listen_list; struct config_file; struct addrinfo; @@ -100,7 +106,9 @@ enum listen_type { /** udp ipv6 (v4mapped) for use with ancillary data + dnscrypt*/ listen_type_udpancil_dnscrypt, /** HTTP(2) over TLS over TCP */ - listen_type_http + listen_type_http, + /** DNS over QUIC */ + listen_type_doq }; /* @@ -188,6 +196,11 @@ int resolve_interface_names(char** ifs, int num_ifs, * @param tcp_conn_limit: TCP connection limit info. * @param sslctx: nonNULL if ssl context. * @param dtenv: nonNULL if dnstap enabled. + * @param doq_table: the doq connection table, with shared information. + * @param rnd: random state. + * @param ssl_service_key: the SSL service key file. + * @param ssl_service_pem: the SSL service pem file. + * @param cfg: config file struct. * @param cb: callback function when a request arrives. It is passed * the packet and user argument. Return true to send a reply. * @param cb_arg: user data argument for callback function. @@ -198,8 +211,10 @@ listen_create(struct comm_base* base, struct listen_port* ports, size_t bufsize, int tcp_accept_count, int tcp_idle_timeout, int harden_large_queries, uint32_t http_max_streams, char* http_endpoint, int http_notls, struct tcl_list* tcp_conn_limit, - void* sslctx, struct dt_env* dtenv, comm_point_callback_type* cb, - void *cb_arg); + void* sslctx, struct dt_env* dtenv, struct doq_table* doq_table, + struct ub_randstate* rnd, const char* ssl_service_key, + const char* ssl_service_pem, struct config_file* cfg, + comm_point_callback_type* cb, void *cb_arg); /** * delete the listening structure @@ -278,11 +293,12 @@ int create_udp_sock(int family, int socktype, struct sockaddr* addr, * @param freebind: set IP_FREEBIND socket option. * @param use_systemd: if true, fetch sockets from systemd. * @param dscp: DSCP to use. + * @param additional: additional log information for the socket type. * @return: the socket. -1 on error. */ int create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto, int* reuseport, int transparent, int mss, int nodelay, int freebind, - int use_systemd, int dscp); + int use_systemd, int dscp, const char* additional); /** * Create and bind local listening socket @@ -452,6 +468,377 @@ int http2_submit_dns_response(struct http2_session* h2_session); int http2_submit_dns_response(void* v); #endif /* HAVE_NGHTTP2 */ +#ifdef HAVE_NGTCP2 +struct doq_conid; +struct doq_server_socket; + +/** + * DoQ shared connection table. This is the connections for the host. + * And some config parameter values for connections. The host has to + * respond on that ip,port for those connections, so they are shared + * between threads. + */ +struct doq_table { + /** the lock on the tree and config elements. insert and deletion, + * also lookup in the tree needs to hold the lock. */ + lock_rw_type lock; + /** rbtree of doq_conn, the connections to different destination + * addresses, and can be found by dcid. */ + struct rbtree_type* conn_tree; + /** lock for the conid tree, needed for the conid tree and also + * the conid elements */ + lock_rw_type conid_lock; + /** rbtree of doq_conid, connections can be found by their + * connection ids. Lookup by connection id, finds doq_conn. */ + struct rbtree_type* conid_tree; + /** the server scid length */ + int sv_scidlen; + /** the static secret for the server */ + uint8_t* static_secret; + /** length of the static secret */ + size_t static_secret_len; + /** the idle timeout in nanoseconds */ + uint64_t idle_timeout; + /** the list of write interested connections, hold the doq_table.lock + * to change them */ + struct doq_conn* write_list_first, *write_list_last; + /** rbtree of doq_timer. */ + struct rbtree_type* timer_tree; + /** lock on the current_size counter. */ + lock_basic_type size_lock; + /** current use, in bytes, of QUIC buffers. + * The doq_conn ngtcp2_conn structure, SSL structure and conid structs + * are not counted. */ + size_t current_size; +}; + +/** create doq table */ +struct doq_table* doq_table_create(struct config_file* cfg, + struct ub_randstate* rnd); + +/** delete doq table */ +void doq_table_delete(struct doq_table* table); + +/** + * Timer information for doq timer. + */ +struct doq_timer { + /** The rbnode in the tree sorted by timeout value. Key this struct. */ + struct rbnode_type node; + /** The timeout value. Absolute time value. */ + struct timeval time; + /** If the timer is in the time tree, with the node. */ + int timer_in_tree; + /** If there are more timers with the exact same timeout value, + * they form a set of timers. The rbnode timer has a link to the list + * with the other timers in the set. The rbnode timer is not a + * member of the list with the other timers. The other timers are not + * linked into the tree. */ + struct doq_timer* setlist_first, *setlist_last; + /** If the timer is on the setlist. */ + int timer_in_list; + /** If in the setlist, the next and prev element. */ + struct doq_timer* setlist_next, *setlist_prev; + /** The connection that is timeouted. */ + struct doq_conn* conn; + /** The worker that is waiting for the timeout event. + * Set for the rbnode tree linked element. If a worker is waiting + * for the event. If NULL, no worker is waiting for this timeout. */ + struct doq_server_socket* worker_doq_socket; +}; + +/** + * Key information that makes a doq_conn node in the tree lookup. + */ +struct doq_conn_key { + /** the remote endpoint and local endpoint and ifindex */ + struct doq_pkt_addr paddr; + /** the doq connection dcid */ + uint8_t* dcid; + /** length of dcid */ + size_t dcidlen; +}; + +/** + * DoQ connection, for DNS over QUIC. One connection to a remote endpoint + * with a number of streams in it. Every stream is like a tcp stream with + * a uint16_t length, query read, and a uint16_t length and answer written. + */ +struct doq_conn { + /** rbtree node, key is addresses and dcid */ + struct rbnode_type node; + /** lock on the connection */ + lock_basic_type lock; + /** the key information, with dcid and address endpoint */ + struct doq_conn_key key; + /** the doq server socket for inside callbacks */ + struct doq_server_socket* doq_socket; + /** the doq table this connection is part of */ + struct doq_table* table; + /** if the connection is about to be deleted. */ + uint8_t is_deleted; + /** the version, the client chosen version of QUIC */ + uint32_t version; + /** the ngtcp2 connection, a server connection */ + struct ngtcp2_conn* conn; + /** the connection ids that are associated with this doq_conn. + * There can be a number, that can change. They are linked here, + * so that upon removal, the list of actually associated conid + * elements can be removed as well. */ + struct doq_conid* conid_list; + /** the ngtcp2 last error for the connection */ +#ifdef HAVE_NGTCP2_CCERR_DEFAULT + struct ngtcp2_ccerr ccerr; +#else + struct ngtcp2_connection_close_error last_error; +#endif + /** the recent tls alert error code */ + uint8_t tls_alert; + /** the ssl context, SSL* */ + void* ssl; +#ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT + /** the connection reference for ngtcp2_conn and userdata in ssl */ + struct ngtcp2_crypto_conn_ref conn_ref; +#endif + /** closure packet, if any */ + uint8_t* close_pkt; + /** length of closure packet. */ + size_t close_pkt_len; + /** closure ecn */ + uint32_t close_ecn; + /** the streams for this connection, of type doq_stream */ + struct rbtree_type stream_tree; + /** the streams that want write, they have something to write. + * The list is ordered, the last have to wait for the first to + * get their data written. */ + struct doq_stream* stream_write_first, *stream_write_last; + /** the conn has write interest if true, no write interest if false. */ + uint8_t write_interest; + /** if the conn is on the connection write list */ + uint8_t on_write_list; + /** the connection write list prev and next, if on the write list */ + struct doq_conn* write_prev, *write_next; + /** The timer for the connection. If unused, it is not in the tree + * and not in the list. It is alloced here, so that it is prealloced. + * It has to be set after every read and write on the connection, so + * this improves performance, but also the allocation does not fail. */ + struct doq_timer timer; +}; + +/** + * Connection ID and the doq_conn that is that connection. A connection + * has an original dcid, and then more connection ids associated. + */ +struct doq_conid { + /** rbtree node, key is the connection id. */ + struct rbnode_type node; + /** the next and prev in the list of conids for the doq_conn */ + struct doq_conid* next, *prev; + /** key to the doq_conn that is the connection */ + struct doq_conn_key key; + /** the connection id, byte string */ + uint8_t* cid; + /** the length of cid */ + size_t cidlen; +}; + +/** + * DoQ stream, for DNS over QUIC. + */ +struct doq_stream { + /** the rbtree node for the stream, key is the stream_id */ + rbnode_type node; + /** the stream id */ + int64_t stream_id; + /** if the stream is closed */ + uint8_t is_closed; + /** if the query is complete */ + uint8_t is_query_complete; + /** the number of bytes read on the stream, up to querylen+2. */ + size_t nread; + /** the length of the input query bytes */ + size_t inlen; + /** the input bytes */ + uint8_t* in; + /** does the stream have an answer to send */ + uint8_t is_answer_available; + /** the answer bytes sent, up to outlen+2. */ + size_t nwrite; + /** the length of the output answer bytes */ + size_t outlen; + /** the output length in network wireformat */ + uint16_t outlen_wire; + /** the output packet bytes */ + uint8_t* out; + /** if the stream is on the write list */ + uint8_t on_write_list; + /** the prev and next on the write list, if on the list */ + struct doq_stream* write_prev, *write_next; +}; + +/** doq application error code that is sent when a stream is closed */ +#define DOQ_APP_ERROR_CODE 1 + +/** + * Create the doq connection. + * @param c: the comm point for the listening doq socket. + * @param paddr: with remote and local address and ifindex for the + * connection destination. This is where packets are sent. + * @param dcid: the dcid, Destination Connection ID. + * @param dcidlen: length of dcid. + * @param version: client chosen version. + * @return new doq connection or NULL on allocation failure. + */ +struct doq_conn* doq_conn_create(struct comm_point* c, + struct doq_pkt_addr* paddr, const uint8_t* dcid, size_t dcidlen, + uint32_t version); + +/** + * Delete the doq connection structure. + * @param conn: to delete. + * @param table: with memory size. + */ +void doq_conn_delete(struct doq_conn* conn, struct doq_table* table); + +/** compare function of doq_conn */ +int doq_conn_cmp(const void* key1, const void* key2); + +/** compare function of doq_conid */ +int doq_conid_cmp(const void* key1, const void* key2); + +/** compare function of doq_timer */ +int doq_timer_cmp(const void* key1, const void* key2); + +/** compare function of doq_stream */ +int doq_stream_cmp(const void* key1, const void* key2); + +/** setup the doq_socket server tls context */ +int doq_socket_setup_ctx(struct doq_server_socket* doq_socket); + +/** setup the doq connection callbacks, and settings. */ +int doq_conn_setup(struct doq_conn* conn, uint8_t* scid, size_t scidlen, + uint8_t* ocid, size_t ocidlen, const uint8_t* token, size_t tokenlen); + +/** fill a buffer with random data */ +void doq_fill_rand(struct ub_randstate* rnd, uint8_t* buf, size_t len); + +/** delete a doq_conid */ +void doq_conid_delete(struct doq_conid* conid); + +/** add a connection id to the doq_conn. + * caller must hold doq_table.conid_lock. */ +int doq_conn_associate_conid(struct doq_conn* conn, uint8_t* data, + size_t datalen); + +/** remove a connection id from the doq_conn. + * caller must hold doq_table.conid_lock. */ +void doq_conn_dissociate_conid(struct doq_conn* conn, const uint8_t* data, + size_t datalen); + +/** initial setup to link current connection ids to the doq_conn */ +int doq_conn_setup_conids(struct doq_conn* conn); + +/** remove the connection ids from the doq_conn. + * caller must hold doq_table.conid_lock. */ +void doq_conn_clear_conids(struct doq_conn* conn); + +/** find a conid in the doq_conn connection. + * caller must hold table.conid_lock. */ +struct doq_conid* doq_conid_find(struct doq_table* doq_table, + const uint8_t* data, size_t datalen); + +/** receive a packet for a connection */ +int doq_conn_recv(struct comm_point* c, struct doq_pkt_addr* paddr, + struct doq_conn* conn, struct ngtcp2_pkt_info* pi, int* err_retry, + int* err_drop); + +/** send packets for a connection */ +int doq_conn_write_streams(struct comm_point* c, struct doq_conn* conn, + int* err_drop); + +/** send the close packet for the connection, perhaps again. */ +int doq_conn_send_close(struct comm_point* c, struct doq_conn* conn); + +/** delete doq stream */ +void doq_stream_delete(struct doq_stream* stream); + +/** doq read a connection key from repinfo. It is not malloced, but points + * into the repinfo for the dcid. */ +void doq_conn_key_from_repinfo(struct doq_conn_key* key, + struct comm_reply* repinfo); + +/** doq find a stream in the connection */ +struct doq_stream* doq_stream_find(struct doq_conn* conn, int64_t stream_id); + +/** doq shutdown the stream. */ +int doq_stream_close(struct doq_conn* conn, struct doq_stream* stream, + int send_shutdown); + +/** send reply for a connection */ +int doq_stream_send_reply(struct doq_conn* conn, struct doq_stream* stream, + struct sldns_buffer* buf); + +/** the connection has write interest, wants to write packets */ +void doq_conn_write_enable(struct doq_conn* conn); + +/** the connection has no write interest, does not want to write packets */ +void doq_conn_write_disable(struct doq_conn* conn); + +/** set the connection on or off the write list, depending on write interest */ +void doq_conn_set_write_list(struct doq_table* table, struct doq_conn* conn); + +/** doq remove the connection from the write list */ +void doq_conn_write_list_remove(struct doq_table* table, + struct doq_conn* conn); + +/** doq get the first conn from the write list, if any, popped from list. + * Locks the conn that is returned. */ +struct doq_conn* doq_table_pop_first(struct doq_table* table); + +/** + * doq check if the timer for the conn needs to be changed. + * @param conn: connection, caller must hold lock on it. + * @param tv: time value, absolute time, returned. + * @return true if timer needs to be set to tv, false if no change is needed + * to the timer. The timer is already set to the right time in that case. + */ +int doq_conn_check_timer(struct doq_conn* conn, struct timeval* tv); + +/** doq remove timer from tree */ +void doq_timer_tree_remove(struct doq_table* table, struct doq_timer* timer); + +/** doq remove timer from list */ +void doq_timer_list_remove(struct doq_table* table, struct doq_timer* timer); + +/** doq unset the timer if it was set. */ +void doq_timer_unset(struct doq_table* table, struct doq_timer* timer); + +/** doq set the timer and add it. */ +void doq_timer_set(struct doq_table* table, struct doq_timer* timer, + struct doq_server_socket* worker_doq_socket, struct timeval* tv); + +/** doq find a timeout in the timer tree */ +struct doq_timer* doq_timer_find_time(struct doq_table* table, + struct timeval* tv); + +/** doq handle timeout for a connection. Pass conn locked. Returns false for + * deletion. */ +int doq_conn_handle_timeout(struct doq_conn* conn); + +/** doq add size to the current quic buffer counter */ +void doq_table_quic_size_add(struct doq_table* table, size_t add); + +/** doq subtract size from the current quic buffer counter */ +void doq_table_quic_size_subtract(struct doq_table* table, size_t subtract); + +/** doq check if mem is available for quic. */ +int doq_table_quic_size_available(struct doq_table* table, + struct config_file* cfg, size_t mem); + +/** doq get the quic size value */ +size_t doq_table_quic_size_get(struct doq_table* table); +#endif /* HAVE_NGTCP2 */ + char* set_ip_dscp(int socket, int addrfamily, int ds); /** for debug and profiling purposes only @@ -459,4 +846,14 @@ char* set_ip_dscp(int socket, int addrfamily, int ds); */ void verbose_print_unbound_socket(struct unbound_socket* ub_sock); +/** event callback for testcode/doqclient */ +void doq_client_event_cb(int fd, short event, void* arg); + +/** timer event callback for testcode/doqclient */ +void doq_client_timer_cb(int fd, short event, void* arg); + +#ifdef HAVE_NGTCP2 +/** get a timestamp in nanoseconds */ +ngtcp2_tstamp doq_get_timestamp_nanosec(void); +#endif #endif /* LISTEN_DNSPORT_H */ diff --git a/smallapp/unbound-control.c b/smallapp/unbound-control.c index 21e7eb82d..b8479e9ab 100644 --- a/smallapp/unbound-control.c +++ b/smallapp/unbound-control.c @@ -293,6 +293,9 @@ static void print_mem(struct ub_shm_stat_info* shm_stat, PR_LL("mem.streamwait", s->svr.mem_stream_wait); PR_LL("mem.http.query_buffer", s->svr.mem_http2_query_buffer); PR_LL("mem.http.response_buffer", s->svr.mem_http2_response_buffer); +#ifdef HAVE_NGTCP2 + PR_LL("mem.quic", s->svr.mem_quic); +#endif } /** print histogram */ @@ -359,6 +362,9 @@ static void print_extended(struct ub_stats_info* s, int inhibit_zero) PR_UL("num.query.tls_resume", s->svr.qtls_resume); PR_UL("num.query.ipv6", s->svr.qipv6); PR_UL("num.query.https", s->svr.qhttps); +#ifdef HAVE_NGTCP2 + PR_UL("num.query.quic", s->svr.qquic); +#endif /* flags */ PR_UL("num.query.flags.QR", s->svr.qbit_QR); diff --git a/smallapp/worker_cb.c b/smallapp/worker_cb.c index c68981735..1d71a0945 100644 --- a/smallapp/worker_cb.c +++ b/smallapp/worker_cb.c @@ -255,3 +255,19 @@ void dtio_mainfdcallback(int ATTR_UNUSED(fd), short ATTR_UNUSED(ev), log_assert(0); } #endif + +#ifdef HAVE_NGTCP2 +void doq_client_event_cb(int ATTR_UNUSED(fd), short ATTR_UNUSED(ev), + void* ATTR_UNUSED(arg)) +{ + log_assert(0); +} +#endif + +#ifdef HAVE_NGTCP2 +void doq_client_timer_cb(int ATTR_UNUSED(fd), short ATTR_UNUSED(ev), + void* ATTR_UNUSED(arg)) +{ + log_assert(0); +} +#endif diff --git a/testcode/checklocks.c b/testcode/checklocks.c index d1c877467..fdc1b8af1 100644 --- a/testcode/checklocks.c +++ b/testcode/checklocks.c @@ -68,6 +68,8 @@ static struct thr_check* thread_infos[THRDEBUG_MAX_THREADS]; int check_locking_order = 1; /** the pid of this runset, reasonably unique. */ static pid_t check_lock_pid; +/** the name of the output file */ +static const char* output_name = "ublocktrace"; /** * Should checklocks print a trace of the lock and unlock calls. * It uses fprintf for that because the log function uses a lock and that @@ -142,7 +144,8 @@ acquire_locklock(struct checked_lock* lock, /** add protected region */ void -lock_protect(void *p, void* area, size_t size) +lock_protect_place(void* p, void* area, size_t size, const char* def_func, + const char* def_file, int def_line, const char* def_area) { struct checked_lock* lock = *(struct checked_lock**)p; struct protected_area* e = (struct protected_area*)malloc( @@ -151,6 +154,10 @@ lock_protect(void *p, void* area, size_t size) fatal_exit("lock_protect: out of memory"); e->region = area; e->size = size; + e->def_func = def_func; + e->def_file = def_file; + e->def_line = def_line; + e->def_area = def_area; e->hold = malloc(size); if(!e->hold) fatal_exit("lock_protect: out of memory"); @@ -203,6 +210,9 @@ prot_check(struct checked_lock* lock, if(memcmp(p->hold, p->region, p->size) != 0) { log_hex("memory prev", p->hold, p->size); log_hex("memory here", p->region, p->size); + log_err("lock_protect on %s %s:%d %s failed", + p->def_func, p->def_file, p->def_line, + p->def_area); lock_error(lock, func, file, line, "protected area modified"); } @@ -675,13 +685,19 @@ checklock_unlock(enum check_lock_type type, struct checked_lock* lock, } } +void +checklock_set_output_name(const char* name) +{ + output_name = name; +} + /** open order info debug file, thr->num must be valid */ static void open_lockorder(struct thr_check* thr) { char buf[24]; time_t t; - snprintf(buf, sizeof(buf), "ublocktrace.%d", thr->num); + snprintf(buf, sizeof(buf), "%s.%d", output_name, thr->num); thr->order_info = fopen(buf, "w"); if(!thr->order_info) fatal_exit("could not open %s: %s", buf, strerror(errno)); diff --git a/testcode/checklocks.h b/testcode/checklocks.h index 61cc6fb0c..7ebc2f984 100644 --- a/testcode/checklocks.h +++ b/testcode/checklocks.h @@ -90,6 +90,14 @@ struct protected_area { void* hold; /** next protected area in list */ struct protected_area* next; + /** the place where the lock_protect is made, at init. */ + const char* def_func; + /** the file where the lock_protect is made */ + const char* def_file; + /** the line number where the lock_protect is made */ + int def_line; + /** the text string for the area that is protected, at init call. */ + const char* def_area; }; /** @@ -181,12 +189,19 @@ struct checked_lock { * It demangles the lock itself (struct checked_lock**). * @param area: ptr to mem. * @param size: length of area. + * @param def_func: function where the lock_protect() line is. + * @param def_file: file where the lock_protect() line is. + * @param def_line: line where the lock_protect() line is. + * @param def_area: area string * You can call it multiple times with the same lock to give several areas. * Call it when you are done initializing the area, since it will be copied * at this time and protected right away against unauthorised changes until * the next lock() call is done. */ -void lock_protect(void* lock, void* area, size_t size); +void lock_protect_place(void* lock, void* area, size_t size, + const char* def_func, const char* def_file, int def_line, + const char* def_area); +#define lock_protect(lock, area, size) lock_protect_place(lock, area, size, __func__, __FILE__, __LINE__, #area) /** * Remove protected area from lock. @@ -203,6 +218,13 @@ void lock_unprotect(void* lock, void* area); */ size_t lock_get_mem(void* lock); +/** + * Set the output name, prefix, of the lock check output file(s). + * Call it before the checklock_start or thread creation. Pass a fixed string. + * @param name: string to use for output data file names. + */ +void checklock_set_output_name(const char* name); + /** * Initialise checklock. Sets up internal debug structures. */ diff --git a/testcode/doqclient.c b/testcode/doqclient.c new file mode 100644 index 000000000..4ba4f8c40 --- /dev/null +++ b/testcode/doqclient.c @@ -0,0 +1,2685 @@ +/* + * testcode/doqclient.c - debug program. Perform multiple DNS queries using DoQ. + * + * Copyright (c) 2022, NLnet Labs. All rights reserved. + * + * This software is open source. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * Neither the name of the NLNET LABS nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * + * Simple DNS-over-QUIC client. For testing and debugging purposes. + * No authentication of TLS cert. + */ + +#include "config.h" +#ifdef HAVE_GETOPT_H +#include +#endif + +#ifdef HAVE_NGTCP2 +#include +#include +#ifdef HAVE_NGTCP2_NGTCP2_CRYPTO_QUICTLS_H +#include +#else +#include +#endif +#include +#include +#ifdef HAVE_TIME_H +#include +#endif +#include +#include "util/locks.h" +#include "util/net_help.h" +#include "sldns/sbuffer.h" +#include "sldns/str2wire.h" +#include "sldns/wire2str.h" +#include "util/data/msgreply.h" +#include "util/data/msgencode.h" +#include "util/data/msgparse.h" +#include "util/data/dname.h" +#include "util/random.h" +#include "util/ub_event.h" +struct doq_client_stream_list; +struct doq_client_stream; + +/** the local client data for the DoQ connection */ +struct doq_client_data { + /** file descriptor */ + int fd; + /** the event base for the events */ + struct ub_event_base* base; + /** the ub event */ + struct ub_event* ev; + /** the expiry timer */ + struct ub_event* expire_timer; + /** is the expire_timer added */ + int expire_timer_added; + /** the ngtcp2 connection information */ + struct ngtcp2_conn* conn; + /** random state */ + struct ub_randstate* rnd; + /** server connected to as a string */ + const char* svr; + /** the static secret */ + uint8_t* static_secret_data; + /** the static secret size */ + size_t static_secret_size; + /** destination address sockaddr */ + struct sockaddr_storage dest_addr; + /** length of dest addr */ + socklen_t dest_addr_len; + /** local address sockaddr */ + struct sockaddr_storage local_addr; + /** length of local addr */ + socklen_t local_addr_len; + /** SSL context */ + SSL_CTX* ctx; + /** SSL object */ + SSL* ssl; +#ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_CLIENT_CONTEXT + /** the connection reference for ngtcp2_conn and userdata in ssl */ + struct ngtcp2_crypto_conn_ref conn_ref; +#endif + /** the quic version to use */ + uint32_t quic_version; + /** the last error */ +#ifdef HAVE_NGTCP2_CCERR_DEFAULT + struct ngtcp2_ccerr ccerr; +#else + struct ngtcp2_connection_close_error last_error; +#endif + /** the recent tls alert error code */ + uint8_t tls_alert; + /** the buffer for packet operations */ + struct sldns_buffer* pkt_buf; + /** The list of queries to start. They have no stream associated. + * Once they do, they move to the send list. */ + struct doq_client_stream_list* query_list_start; + /** The list of queries to send. They have a stream, and they are + * sending data. Data could also be received, like errors. */ + struct doq_client_stream_list* query_list_send; + /** The list of queries to receive. They have a stream, and the + * send is done, it is possible to read data. */ + struct doq_client_stream_list* query_list_receive; + /** The list of queries that are stopped. They have no stream + * active any more. Write and read are done. The query is done, + * and it may be in error and then have no answer or partial answer. */ + struct doq_client_stream_list* query_list_stop; + /** is there a blocked packet in the blocked_pkt buffer */ + int have_blocked_pkt; + /** store blocked packet, a packet that could not be sent on the + * nonblocking socket. */ + struct sldns_buffer* blocked_pkt; + /** ecn info for the blocked packet */ + struct ngtcp2_pkt_info blocked_pkt_pi; + /** the congestion control algorithm */ + ngtcp2_cc_algo cc_algo; + /** the transport parameters file, for early data transmission */ + const char* transport_file; + /** the tls session file, for session resumption */ + const char* session_file; + /** if early data is enabled for the connection */ + int early_data_enabled; + /** how quiet is the output */ + int quiet; + /** the configured port for the destination */ + int port; +}; + +/** the local client stream list, for appending streams to */ +struct doq_client_stream_list { + /** first and last members of the list */ + struct doq_client_stream* first, *last; +}; + +/** the local client data for a DoQ stream */ +struct doq_client_stream { + /** next stream in list, and prev in list */ + struct doq_client_stream* next, *prev; + /** the data buffer */ + uint8_t* data; + /** length of the data buffer */ + size_t data_len; + /** if the client query has a stream, that is active, associated with + * it. The stream_id is in stream_id. */ + int has_stream; + /** the stream id */ + int64_t stream_id; + /** data written position */ + size_t nwrite; + /** the data length for write, in network format */ + uint16_t data_tcplen; + /** if the write of the query data is done. That means the + * write channel has FIN, is closed for writing. */ + int write_is_done; + /** data read position */ + size_t nread; + /** the answer length, in network byte order */ + uint16_t answer_len; + /** the answer buffer */ + struct sldns_buffer* answer; + /** the answer is complete */ + int answer_is_complete; + /** the query has an error, it has no answer, or no complete answer */ + int query_has_error; + /** if the query is done */ + int query_is_done; +}; + +#ifndef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_CLIENT_CONTEXT +/** the quic method struct, must remain valid during the QUIC connection. */ +static SSL_QUIC_METHOD quic_method; +#endif + +/** Get the connection ngtcp2_conn from the ssl app data + * ngtcp2_crypto_conn_ref */ +static ngtcp2_conn* conn_ref_get_conn(ngtcp2_crypto_conn_ref* conn_ref) +{ + struct doq_client_data* data = (struct doq_client_data*) + conn_ref->user_data; + return data->conn; +} + +static void +set_app_data(SSL* ssl, struct doq_client_data* data) +{ +#ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_CLIENT_CONTEXT + data->conn_ref.get_conn = &conn_ref_get_conn; + data->conn_ref.user_data = data; + SSL_set_app_data(ssl, &data->conn_ref); +#else + SSL_set_app_data(ssl, data); +#endif +} + +static struct doq_client_data* +get_app_data(SSL* ssl) +{ + struct doq_client_data* data; +#ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_CLIENT_CONTEXT + data = (struct doq_client_data*)((struct ngtcp2_crypto_conn_ref*) + SSL_get_app_data(ssl))->user_data; +#else + data = (struct doq_client_data*) SSL_get_app_data(ssl); +#endif + return data; +} + + + +/** write handle routine */ +static void on_write(struct doq_client_data* data); +/** update the timer */ +static void update_timer(struct doq_client_data* data); +/** disconnect we are done */ +static void disconnect(struct doq_client_data* data); +/** fetch and write the transport file */ +static void early_data_write_transport(struct doq_client_data* data); + +/** usage of doqclient */ +static void usage(char* argv[]) +{ + printf("usage: %s [options] name type class ...\n", argv[0]); + printf(" sends the name-type-class queries over " + "DNS-over-QUIC.\n"); + printf("-s server IP address to send the queries to, " + "default: 127.0.0.1\n"); + printf("-p Port to connect to, default: %d\n", + UNBOUND_DNS_OVER_QUIC_PORT); + printf("-v verbose output\n"); + printf("-q quiet, short output of answer\n"); + printf("-x file transport file, for read/write of transport parameters.\n\t\tIf it exists, it is used to send early data. It is then\n\t\twritten to contain the last used transport parameters.\n\t\tAlso -y must be enabled for early data to succeed.\n"); + printf("-y file session file, for read/write of TLS session. If it exists,\n\t\tit is used for TLS session resumption. It is then written\n\t\tto contain the last session used.\n\t\tOn its own, without also -x, resumes TLS session.\n"); + printf("-h This help text\n"); + exit(1); +} + +/** get the dest address */ +static void +get_dest_addr(struct doq_client_data* data, const char* svr, int port) +{ + if(!ipstrtoaddr(svr, port, &data->dest_addr, &data->dest_addr_len)) { + printf("fatal: bad server specs '%s'\n", svr); + exit(1); + } +} + +/** open UDP socket to svr */ +static int +open_svr_udp(struct doq_client_data* data) +{ + int fd = -1; + int r; + fd = socket(addr_is_ip6(&data->dest_addr, data->dest_addr_len)? + PF_INET6:PF_INET, SOCK_DGRAM, 0); + if(fd == -1) { + perror("socket() error"); + exit(1); + } + r = connect(fd, (struct sockaddr*)&data->dest_addr, + data->dest_addr_len); + if(r < 0 && r != EINPROGRESS) { + perror("connect() error"); + exit(1); + } + fd_set_nonblock(fd); + return fd; +} + +/** get the local address of the connection */ +static void +get_local_addr(struct doq_client_data* data) +{ + memset(&data->local_addr, 0, sizeof(data->local_addr)); + data->local_addr_len = (socklen_t)sizeof(data->local_addr); + if(getsockname(data->fd, (struct sockaddr*)&data->local_addr, + &data->local_addr_len) == -1) { + perror("getsockname() error"); + exit(1); + } + log_addr(1, "local_addr", &data->local_addr, data->local_addr_len); + log_addr(1, "dest_addr", &data->dest_addr, data->dest_addr_len); +} + +static sldns_buffer* +make_query(char* qname, char* qtype, char* qclass) +{ + struct query_info qinfo; + struct edns_data edns; + sldns_buffer* buf = sldns_buffer_new(65553); + if(!buf) fatal_exit("out of memory"); + qinfo.qname = sldns_str2wire_dname(qname, &qinfo.qname_len); + if(!qinfo.qname) { + printf("cannot parse query name: '%s'\n", qname); + exit(1); + } + + qinfo.qtype = sldns_get_rr_type_by_name(qtype); + qinfo.qclass = sldns_get_rr_class_by_name(qclass); + qinfo.local_alias = NULL; + + qinfo_query_encode(buf, &qinfo); /* flips buffer */ + free(qinfo.qname); + sldns_buffer_write_u16_at(buf, 0, 0x0000); + sldns_buffer_write_u16_at(buf, 2, BIT_RD); + memset(&edns, 0, sizeof(edns)); + edns.edns_present = 1; + edns.bits = EDNS_DO; + edns.udp_size = 4096; + if(sldns_buffer_capacity(buf) >= + sldns_buffer_limit(buf)+calc_edns_field_size(&edns)) + attach_edns_record(buf, &edns); + return buf; +} + +/** create client stream structure */ +static struct doq_client_stream* +client_stream_create(struct sldns_buffer* query_data) +{ + struct doq_client_stream* str = calloc(1, sizeof(*str)); + if(!str) + fatal_exit("calloc failed: out of memory"); + str->data = memdup(sldns_buffer_begin(query_data), + sldns_buffer_limit(query_data)); + if(!str->data) + fatal_exit("alloc data failed: out of memory"); + str->data_len = sldns_buffer_limit(query_data); + str->stream_id = -1; + return str; +} + +/** free client stream structure */ +static void +client_stream_free(struct doq_client_stream* str) +{ + if(!str) + return; + free(str->data); + sldns_buffer_free(str->answer); + free(str); +} + +/** setup the stream to start the write process */ +static void +client_stream_start_setup(struct doq_client_stream* str, int64_t stream_id) +{ + str->has_stream = 1; + str->stream_id = stream_id; + str->nwrite = 0; + str->nread = 0; + str->answer_len = 0; + str->query_is_done = 0; + str->answer_is_complete = 0; + str->query_has_error = 0; + if(str->answer) { + sldns_buffer_free(str->answer); + str->answer = NULL; + } +} + +/** Return string for log purposes with query name. */ +static char* +client_stream_string(struct doq_client_stream* str) +{ + char* s; + size_t dname_len; + char dname[256], tpstr[32], result[256+32+16]; + uint16_t tp; + if(str->data_len <= LDNS_HEADER_SIZE) { + s = strdup("query_with_no_question"); + if(!s) + fatal_exit("strdup failed: out of memory"); + return s; + } + dname_len = dname_valid(str->data+LDNS_HEADER_SIZE, + str->data_len-LDNS_HEADER_SIZE); + if(!dname_len) { + s = strdup("query_dname_not_valid"); + if(!s) + fatal_exit("strdup failed: out of memory"); + return s; + } + (void)sldns_wire2str_dname_buf(str->data+LDNS_HEADER_SIZE, dname_len, + dname, sizeof(dname)); + tp = sldns_wirerr_get_type(str->data+LDNS_HEADER_SIZE, + str->data_len-LDNS_HEADER_SIZE, dname_len); + (void)sldns_wire2str_type_buf(tp, tpstr, sizeof(tpstr)); + snprintf(result, sizeof(result), "%s %s", dname, tpstr); + s = strdup(result); + if(!s) + fatal_exit("strdup failed: out of memory"); + return s; +} + +/** create query stream list */ +static struct doq_client_stream_list* +stream_list_create(void) +{ + struct doq_client_stream_list* list = calloc(1, sizeof(*list)); + if(!list) + fatal_exit("calloc failed: out of memory"); + return list; +} + +/** free the query stream list */ +static void +stream_list_free(struct doq_client_stream_list* list) +{ + struct doq_client_stream* str; + if(!list) + return; + str = list->first; + while(str) { + struct doq_client_stream* next = str->next; + client_stream_free(str); + str = next; + } + free(list); +} + +/** append item to list */ +static void +stream_list_append(struct doq_client_stream_list* list, + struct doq_client_stream* str) +{ + if(list->last) { + str->prev = list->last; + list->last->next = str; + } else { + str->prev = NULL; + list->first = str; + } + str->next = NULL; + list->last = str; +} + +/** delete the item from the list */ +static void +stream_list_delete(struct doq_client_stream_list* list, + struct doq_client_stream* str) +{ + if(str->next) { + str->next->prev = str->prev; + } else { + list->last = str->prev; + } + if(str->prev) { + str->prev->next = str->next; + } else { + list->first = str->next; + } + str->prev = NULL; + str->next = NULL; +} + +/** move the item from list1 to list2 */ +static void +stream_list_move(struct doq_client_stream* str, + struct doq_client_stream_list* list1, + struct doq_client_stream_list* list2) +{ + stream_list_delete(list1, str); + stream_list_append(list2, str); +} + +/** allocate stream data buffer, then answer length is complete */ +static void +client_stream_datalen_complete(struct doq_client_stream* str) +{ + verbose(1, "answer length %d", (int)ntohs(str->answer_len)); + str->answer = sldns_buffer_new(ntohs(str->answer_len)); + if(!str->answer) + fatal_exit("sldns_buffer_new failed: out of memory"); + sldns_buffer_set_limit(str->answer, ntohs(str->answer_len)); +} + +/** print the answer rrs */ +static void +print_answer_rrs(uint8_t* pkt, size_t pktlen) +{ + char buf[65535]; + char* str; + size_t str_len; + int i, qdcount, ancount; + uint8_t* data = pkt; + size_t data_len = pktlen; + int comprloop = 0; + if(data_len < LDNS_HEADER_SIZE) + return; + qdcount = LDNS_QDCOUNT(data); + ancount = LDNS_ANCOUNT(data); + data += LDNS_HEADER_SIZE; + data_len -= LDNS_HEADER_SIZE; + + for(i=0; iquery_has_error) { + char* logs = client_stream_string(str); + printf("%s has error, there is no answer\n", logs); + free(logs); + return; + } + if(sldns_buffer_limit(str->answer) < LDNS_HEADER_SIZE) { + char* logs = client_stream_string(str); + printf("%s received short packet, smaller than header\n", + logs); + free(logs); + return; + } + rcode = LDNS_RCODE_WIRE(sldns_buffer_begin(str->answer)); + if(rcode != 0) { + char* logs = client_stream_string(str); + char rc[16]; + (void)sldns_wire2str_rcode_buf(rcode, rc, sizeof(rc)); + printf("%s rcode %s\n", logs, rc); + free(logs); + return; + } + ancount = LDNS_ANCOUNT(sldns_buffer_begin(str->answer)); + if(ancount == 0) { + char* logs = client_stream_string(str); + printf("%s nodata answer\n", logs); + free(logs); + return; + } + print_answer_rrs(sldns_buffer_begin(str->answer), + sldns_buffer_limit(str->answer)); +} + +/** print the stream output answer */ +static void +client_stream_print_long(struct doq_client_data* data, + struct doq_client_stream* str) +{ + char* s; + if(str->query_has_error) { + char* logs = client_stream_string(str); + printf("%s has error, there is no answer\n", logs); + free(logs); + return; + } + s = sldns_wire2str_pkt(sldns_buffer_begin(str->answer), + sldns_buffer_limit(str->answer)); + printf("%s", (s?s:";sldns_wire2str_pkt failed\n")); + printf(";; SERVER: %s %d\n", data->svr, data->port); + free(s); +} + +/** the stream has completed the data */ +static void +client_stream_data_complete(struct doq_client_stream* str) +{ + verbose(1, "received all answer content"); + if(verbosity > 0) { + char* logs = client_stream_string(str); + char* s; + log_buf(1, "received answer", str->answer); + s = sldns_wire2str_pkt(sldns_buffer_begin(str->answer), + sldns_buffer_limit(str->answer)); + if(!s) verbose(1, "could not sldns_wire2str_pkt"); + else verbose(1, "query %s received:\n%s", logs, s); + free(s); + free(logs); + } + str->answer_is_complete = 1; +} + +/** the stream has completed but with an error */ +static void +client_stream_answer_error(struct doq_client_stream* str) +{ + if(verbosity > 0) { + char* logs = client_stream_string(str); + if(str->answer) + verbose(1, "query %s has an error. received %d/%d bytes.", + logs, (int)sldns_buffer_position(str->answer), + (int)sldns_buffer_limit(str->answer)); + else + verbose(1, "query %s has an error. received no data.", + logs); + free(logs); + } + str->query_has_error = 1; +} + +/** receive data for a stream */ +static void +client_stream_recv_data(struct doq_client_stream* str, const uint8_t* data, + size_t datalen) +{ + int got_data = 0; + /* read the tcplength uint16_t at the start of the DNS message */ + if(str->nread < 2) { + size_t to_move = datalen; + if(datalen > 2-str->nread) + to_move = 2-str->nread; + memmove(((uint8_t*)&str->answer_len)+str->nread, data, + to_move); + str->nread += to_move; + data += to_move; + datalen -= to_move; + if(str->nread == 2) { + /* we can allocate the data buffer */ + client_stream_datalen_complete(str); + } + } + /* if we have data bytes */ + if(datalen > 0) { + size_t to_write = datalen; + if(datalen > sldns_buffer_remaining(str->answer)) + to_write = sldns_buffer_remaining(str->answer); + if(to_write > 0) { + sldns_buffer_write(str->answer, data, to_write); + str->nread += to_write; + data += to_write; + datalen -= to_write; + got_data = 1; + } + } + /* extra received bytes after end? */ + if(datalen > 0) { + verbose(1, "extra bytes after end of DNS length"); + if(verbosity > 0) + log_hex("extradata", (void*)data, datalen); + } + /* are we done with it? */ + if(got_data && str->nread >= (size_t)(ntohs(str->answer_len))+2) { + client_stream_data_complete(str); + } +} + +/** receive FIN from remote end on client stream, no more data to be + * received on the stream. */ +static void +client_stream_recv_fin(struct doq_client_data* data, + struct doq_client_stream* str, int is_fin) +{ + if(verbosity > 0) { + char* logs = client_stream_string(str); + if(is_fin) + verbose(1, "query %s: received FIN from remote", logs); + else + verbose(1, "query %s: stream reset from remote", logs); + free(logs); + } + if(str->write_is_done) + stream_list_move(str, data->query_list_receive, + data->query_list_stop); + else + stream_list_move(str, data->query_list_send, + data->query_list_stop); + if(!str->answer_is_complete) { + client_stream_answer_error(str); + } + str->query_is_done = 1; + if(data->quiet) + client_stream_print_short(str); + else client_stream_print_long(data, str); + if(data->query_list_send->first==NULL && + data->query_list_receive->first==NULL) + disconnect(data); +} + +/** fill a buffer with random data */ +static void fill_rand(struct ub_randstate* rnd, uint8_t* buf, size_t len) +{ + if(RAND_bytes(buf, len) != 1) { + size_t i; + for(i=0; istatic_secret_data = malloc(len); + if(!data->static_secret_data) + fatal_exit("malloc failed: out of memory"); + data->static_secret_size = len; + fill_rand(data->rnd, data->static_secret_data, len); +} + +/** fill cid structure with random data */ +static void cid_randfill(struct ngtcp2_cid* cid, size_t datalen, + struct ub_randstate* rnd) +{ + uint8_t buf[32]; + if(datalen > sizeof(buf)) + datalen = sizeof(buf); + fill_rand(rnd, buf, datalen); + ngtcp2_cid_init(cid, buf, datalen); +} + +/** send buf on the client stream */ +static int +client_bidi_stream(struct doq_client_data* data, int64_t* ret_stream_id, + void* stream_user_data) +{ + int64_t stream_id; + int rv; + + /* open new bidirectional stream */ + rv = ngtcp2_conn_open_bidi_stream(data->conn, &stream_id, + stream_user_data); + if(rv != 0) { + if(rv == NGTCP2_ERR_STREAM_ID_BLOCKED) { + /* no bidi stream count for this new stream */ + return 0; + } + fatal_exit("could not ngtcp2_conn_open_bidi_stream: %s", + ngtcp2_strerror(rv)); + } + *ret_stream_id = stream_id; + return 1; +} + +/** See if we can start query streams, by creating bidirectional streams + * on the QUIC transport for them. */ +static void +query_streams_start(struct doq_client_data* data) +{ + while(data->query_list_start->first) { + struct doq_client_stream* str = data->query_list_start->first; + int64_t stream_id = 0; + if(!client_bidi_stream(data, &stream_id, str)) { + /* no more bidi streams allowed */ + break; + } + if(verbosity > 0) { + char* logs = client_stream_string(str); + verbose(1, "query %s start on bidi stream id %lld", + logs, (long long int)stream_id); + free(logs); + } + /* setup the stream to start */ + client_stream_start_setup(str, stream_id); + /* move the query entry to the send list to write it */ + stream_list_move(str, data->query_list_start, + data->query_list_send); + } +} + +/** the rand callback routine from ngtcp2 */ +static void rand_cb(uint8_t* dest, size_t destlen, + const ngtcp2_rand_ctx* rand_ctx) +{ + struct ub_randstate* rnd = (struct ub_randstate*) + rand_ctx->native_handle; + fill_rand(rnd, dest, destlen); +} + +/** the get_new_connection_id callback routine from ngtcp2 */ +static int get_new_connection_id_cb(struct ngtcp2_conn* ATTR_UNUSED(conn), + struct ngtcp2_cid* cid, uint8_t* token, size_t cidlen, void* user_data) +{ + struct doq_client_data* data = (struct doq_client_data*)user_data; + cid_randfill(cid, cidlen, data->rnd); + if(ngtcp2_crypto_generate_stateless_reset_token(token, + data->static_secret_data, data->static_secret_size, cid) != 0) + return NGTCP2_ERR_CALLBACK_FAILURE; + return 0; +} + +/** handle that early data is rejected */ +static void +early_data_is_rejected(struct doq_client_data* data) +{ + int rv; + verbose(1, "early data was rejected by the server"); +#ifdef HAVE_NGTCP2_CONN_TLS_EARLY_DATA_REJECTED + rv = ngtcp2_conn_tls_early_data_rejected(data->conn); +#else + rv = ngtcp2_conn_early_data_rejected(data->conn); +#endif + if(rv != 0) { + log_err("ngtcp2_conn_early_data_rejected failed: %s", + ngtcp2_strerror(rv)); + return; + } + /* move the streams back to the start state */ + while(data->query_list_send->first) { + struct doq_client_stream* str = data->query_list_send->first; + /* move it back to the start list */ + stream_list_move(str, data->query_list_send, + data->query_list_start); + str->has_stream = 0; + /* remove stream id */ + str->stream_id = 0; + /* initialise other members, in case they are altered, + * but unlikely, because early streams are rejected. */ + str->nwrite = 0; + str->nread = 0; + str->answer_len = 0; + str->query_is_done = 0; + str->answer_is_complete = 0; + str->query_has_error = 0; + if(str->answer) { + sldns_buffer_free(str->answer); + str->answer = NULL; + } + } +} + +/** the handshake completed callback from ngtcp2 */ +static int +handshake_completed(ngtcp2_conn* ATTR_UNUSED(conn), void* user_data) +{ + struct doq_client_data* data = (struct doq_client_data*)user_data; + verbose(1, "handshake_completed callback"); + verbose(1, "ngtcp2_conn_get_max_data_left is %d", + (int)ngtcp2_conn_get_max_data_left(data->conn)); +#ifdef HAVE_NGTCP2_CONN_GET_MAX_LOCAL_STREAMS_UNI + verbose(1, "ngtcp2_conn_get_max_local_streams_uni is %d", + (int)ngtcp2_conn_get_max_local_streams_uni(data->conn)); +#endif + verbose(1, "ngtcp2_conn_get_streams_uni_left is %d", + (int)ngtcp2_conn_get_streams_uni_left(data->conn)); + verbose(1, "ngtcp2_conn_get_streams_bidi_left is %d", + (int)ngtcp2_conn_get_streams_bidi_left(data->conn)); + verbose(1, "negotiated cipher name is %s", + SSL_get_cipher_name(data->ssl)); + if(verbosity > 0) { + const unsigned char* alpn = NULL; + unsigned int alpnlen = 0; + char alpnstr[128]; + SSL_get0_alpn_selected(data->ssl, &alpn, &alpnlen); + if(alpnlen > sizeof(alpnstr)-1) + alpnlen = sizeof(alpnstr)-1; + memmove(alpnstr, alpn, alpnlen); + alpnstr[alpnlen]=0; + verbose(1, "negotiated ALPN is '%s'", alpnstr); + } + /* The SSL_get_early_data_status call works after the handshake + * completes. */ + if(data->early_data_enabled) { + if(SSL_get_early_data_status(data->ssl) != + SSL_EARLY_DATA_ACCEPTED) { + early_data_is_rejected(data); + } else { + verbose(1, "early data was accepted by the server"); + } + } +#ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_CLIENT_CONTEXT + if(data->transport_file) { + early_data_write_transport(data); + } +#endif + return 0; +} + +/** the extend_max_local_streams_bidi callback from ngtcp2 */ +static int +extend_max_local_streams_bidi(ngtcp2_conn* ATTR_UNUSED(conn), + uint64_t max_streams, void* user_data) +{ + struct doq_client_data* data = (struct doq_client_data*)user_data; + verbose(1, "extend_max_local_streams_bidi callback, %d max_streams", + (int)max_streams); + verbose(1, "ngtcp2_conn_get_max_data_left is %d", + (int)ngtcp2_conn_get_max_data_left(data->conn)); +#ifdef HAVE_NGTCP2_CONN_GET_MAX_LOCAL_STREAMS_UNI + verbose(1, "ngtcp2_conn_get_max_local_streams_uni is %d", + (int)ngtcp2_conn_get_max_local_streams_uni(data->conn)); +#endif + verbose(1, "ngtcp2_conn_get_streams_uni_left is %d", + (int)ngtcp2_conn_get_streams_uni_left(data->conn)); + verbose(1, "ngtcp2_conn_get_streams_bidi_left is %d", + (int)ngtcp2_conn_get_streams_bidi_left(data->conn)); + query_streams_start(data); + return 0; +} + +/** the recv_stream_data callback from ngtcp2 */ +static int +recv_stream_data(ngtcp2_conn* ATTR_UNUSED(conn), uint32_t flags, + int64_t stream_id, uint64_t offset, const uint8_t* data, + size_t datalen, void* user_data, void* stream_user_data) +{ + struct doq_client_data* doqdata = (struct doq_client_data*)user_data; + struct doq_client_stream* str = (struct doq_client_stream*) + stream_user_data; + verbose(1, "recv_stream_data stream %d offset %d datalen %d%s%s", + (int)stream_id, (int)offset, (int)datalen, + ((flags&NGTCP2_STREAM_DATA_FLAG_FIN)!=0?" FIN":""), +#ifdef NGTCP2_STREAM_DATA_FLAG_0RTT + ((flags&NGTCP2_STREAM_DATA_FLAG_0RTT)!=0?" 0RTT":"") +#else + ((flags&NGTCP2_STREAM_DATA_FLAG_EARLY)!=0?" EARLY":"") +#endif + ); + if(verbosity > 0) + log_hex("data", (void*)data, datalen); + if(verbosity > 0) { + char* logs = client_stream_string(str); + verbose(1, "the stream_user_data is %s stream id %d, nread %d", + logs, (int)str->stream_id, (int)str->nread); + free(logs); + } + + /* append the data, if there is data */ + if(datalen > 0) { + client_stream_recv_data(str, data, datalen); + } + if((flags&NGTCP2_STREAM_DATA_FLAG_FIN)!=0) { + client_stream_recv_fin(doqdata, str, 1); + } + ngtcp2_conn_extend_max_stream_offset(doqdata->conn, stream_id, datalen); + ngtcp2_conn_extend_max_offset(doqdata->conn, datalen); + return 0; +} + +/** the stream reset callback from ngtcp2 */ +static int +stream_reset(ngtcp2_conn* ATTR_UNUSED(conn), int64_t stream_id, + uint64_t final_size, uint64_t app_error_code, void* user_data, + void* stream_user_data) +{ + struct doq_client_data* doqdata = (struct doq_client_data*)user_data; + struct doq_client_stream* str = (struct doq_client_stream*) + stream_user_data; + verbose(1, "stream reset for stream %d final size %d app error code %d", + (int)stream_id, (int)final_size, (int)app_error_code); + client_stream_recv_fin(doqdata, str, 0); + return 0; +} + +/** copy sockaddr into ngtcp2 addr */ +static void +copy_ngaddr(struct ngtcp2_addr* ngaddr, struct sockaddr_storage* addr, + socklen_t addrlen) +{ + if(addr_is_ip6(addr, addrlen)) { +#if defined(NGTCP2_USE_GENERIC_SOCKADDR) || defined(NGTCP2_USE_GENERIC_IPV6_SOCKADDR) + struct sockaddr_in* i6 = (struct sockaddr_in6*)addr; + struct ngtcp2_sockaddr_in6 a6; + ngaddr->addr = calloc(1, sizeof(a6)); + if(!ngaddr->addr) fatal_exit("calloc failed: out of memory"); + ngaddr->addrlen = sizeof(a6); + memset(&a6, 0, sizeof(a6)); + a6.sin6_family = i6->sin6_family; + a6.sin6_port = i6->sin6_port; + a6.sin6_flowinfo = i6->sin6_flowinfo; + memmove(&a6.sin6_addr, i6->sin6_addr, sizeof(a6.sin6_addr); + a6.sin6_scope_id = i6->sin6_scope_id; + memmove(ngaddr->addr, &a6, sizeof(a6)); +#else + ngaddr->addr = (ngtcp2_sockaddr*)addr; + ngaddr->addrlen = addrlen; +#endif + } else { +#ifdef NGTCP2_USE_GENERIC_SOCKADDR + struct sockaddr_in* i4 = (struct sockaddr_in*)addr; + struct ngtcp2_sockaddr_in a4; + ngaddr->addr = calloc(1, sizeof(a4)); + if(!ngaddr->addr) fatal_exit("calloc failed: out of memory"); + ngaddr->addrlen = sizeof(a4); + memset(&a4, 0, sizeof(a4)); + a4.sin_family = i4->sin_family; + a4.sin_port = i4->sin_port; + memmove(&a4.sin_addr, i4->sin_addr, sizeof(a4.sin_addr); + memmove(ngaddr->addr, &a4, sizeof(a4)); +#else + ngaddr->addr = (ngtcp2_sockaddr*)addr; + ngaddr->addrlen = addrlen; +#endif + } +} + +/** debug log printf for ngtcp2 connections */ +static void log_printf_for_doq(void* ATTR_UNUSED(user_data), + const char* fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + fprintf(stderr, "libngtcp2: "); + vfprintf(stderr, fmt, ap); + va_end(ap); + fprintf(stderr, "\n"); +} + +/** get a timestamp in nanoseconds */ +static ngtcp2_tstamp get_timestamp_nanosec(void) +{ +#ifdef CLOCK_REALTIME + struct timespec tp; + memset(&tp, 0, sizeof(tp)); +#ifdef CLOCK_MONOTONIC + if(clock_gettime(CLOCK_MONOTONIC, &tp) == -1) { +#endif + if(clock_gettime(CLOCK_REALTIME, &tp) == -1) { + log_err("clock_gettime failed: %s", strerror(errno)); + } +#ifdef CLOCK_MONOTONIC + } +#endif + return ((uint64_t)tp.tv_sec)*((uint64_t)1000000000) + + ((uint64_t)tp.tv_nsec); +#else + struct timeval tv; + if(gettimeofday(&tv, NULL) < 0) { + log_err("gettimeofday failed: %s", strerror(errno)); + } + return ((uint64_t)tv.tv_sec)*((uint64_t)1000000000) + + ((uint64_t)tv.tv_usec)*((uint64_t)1000); +#endif /* CLOCK_REALTIME */ +} + +/** create ngtcp2 client connection and set up. */ +static struct ngtcp2_conn* conn_client_setup(struct doq_client_data* data) +{ + struct ngtcp2_conn* conn = NULL; + int rv; + struct ngtcp2_cid dcid, scid; + struct ngtcp2_path path; + uint32_t client_chosen_version = NGTCP2_PROTO_VER_V1; + struct ngtcp2_callbacks cbs; + struct ngtcp2_settings settings; + struct ngtcp2_transport_params params; + + memset(&cbs, 0, sizeof(cbs)); + memset(&settings, 0, sizeof(settings)); + memset(¶ms, 0, sizeof(params)); + memset(&dcid, 0, sizeof(dcid)); + memset(&scid, 0, sizeof(scid)); + memset(&path, 0, sizeof(path)); + + data->quic_version = client_chosen_version; + ngtcp2_settings_default(&settings); + if(str_is_ip6(data->svr)) { +#ifdef HAVE_STRUCT_NGTCP2_SETTINGS_MAX_TX_UDP_PAYLOAD_SIZE + settings.max_tx_udp_payload_size = 1232; +#else + settings.max_udp_payload_size = 1232; +#endif + } + settings.rand_ctx.native_handle = data->rnd; + if(verbosity > 0) { + /* make debug logs */ + settings.log_printf = log_printf_for_doq; + } + settings.initial_ts = get_timestamp_nanosec(); + ngtcp2_transport_params_default(¶ms); + params.initial_max_stream_data_bidi_local = 256*1024; + params.initial_max_stream_data_bidi_remote = 256*1024; + params.initial_max_stream_data_uni = 256*1024; + params.initial_max_data = 1024*1024; + params.initial_max_streams_bidi = 0; + params.initial_max_streams_uni = 100; + params.max_idle_timeout = 30*NGTCP2_SECONDS; + params.active_connection_id_limit = 7; + cid_randfill(&dcid, 16, data->rnd); + cid_randfill(&scid, 16, data->rnd); + cbs.client_initial = ngtcp2_crypto_client_initial_cb; + cbs.recv_crypto_data = ngtcp2_crypto_recv_crypto_data_cb; + cbs.encrypt = ngtcp2_crypto_encrypt_cb; + cbs.decrypt = ngtcp2_crypto_decrypt_cb; + cbs.hp_mask = ngtcp2_crypto_hp_mask_cb; + cbs.recv_retry = ngtcp2_crypto_recv_retry_cb; + cbs.update_key = ngtcp2_crypto_update_key_cb; + cbs.delete_crypto_aead_ctx = ngtcp2_crypto_delete_crypto_aead_ctx_cb; + cbs.delete_crypto_cipher_ctx = + ngtcp2_crypto_delete_crypto_cipher_ctx_cb; + cbs.get_path_challenge_data = ngtcp2_crypto_get_path_challenge_data_cb; + cbs.version_negotiation = ngtcp2_crypto_version_negotiation_cb; + cbs.get_new_connection_id = get_new_connection_id_cb; + cbs.handshake_completed = handshake_completed; + cbs.extend_max_local_streams_bidi = extend_max_local_streams_bidi; + cbs.rand = rand_cb; + cbs.recv_stream_data = recv_stream_data; + cbs.stream_reset = stream_reset; + copy_ngaddr(&path.local, &data->local_addr, data->local_addr_len); + copy_ngaddr(&path.remote, &data->dest_addr, data->dest_addr_len); + + rv = ngtcp2_conn_client_new(&conn, &dcid, &scid, &path, + client_chosen_version, &cbs, &settings, ¶ms, + NULL, /* ngtcp2_mem allocator, use default */ + data /* callback argument */); + if(!conn) fatal_exit("could not ngtcp2_conn_client_new: %s", + ngtcp2_strerror(rv)); + data->cc_algo = settings.cc_algo; + return conn; +} + +#ifndef HAVE_NGTCP2_CONN_ENCODE_0RTT_TRANSPORT_PARAMS +/** write the transport file */ +static void +transport_file_write(const char* file, struct ngtcp2_transport_params* params) +{ + FILE* out; + out = fopen(file, "w"); + if(!out) { + perror(file); + return; + } + fprintf(out, "initial_max_streams_bidi=%u\n", + (unsigned)params->initial_max_streams_bidi); + fprintf(out, "initial_max_streams_uni=%u\n", + (unsigned)params->initial_max_streams_uni); + fprintf(out, "initial_max_stream_data_bidi_local=%u\n", + (unsigned)params->initial_max_stream_data_bidi_local); + fprintf(out, "initial_max_stream_data_bidi_remote=%u\n", + (unsigned)params->initial_max_stream_data_bidi_remote); + fprintf(out, "initial_max_stream_data_uni=%u\n", + (unsigned)params->initial_max_stream_data_uni); + fprintf(out, "initial_max_data=%u\n", + (unsigned)params->initial_max_data); + fprintf(out, "active_connection_id_limit=%u\n", + (unsigned)params->active_connection_id_limit); + fprintf(out, "max_datagram_frame_size=%u\n", + (unsigned)params->max_datagram_frame_size); + if(ferror(out)) { + verbose(1, "There was an error writing %s: %s", + file, strerror(errno)); + fclose(out); + return; + } + fclose(out); +} +#endif /* HAVE_NGTCP2_CONN_ENCODE_0RTT_TRANSPORT_PARAMS */ + +/** fetch and write the transport file */ +static void +early_data_write_transport(struct doq_client_data* data) +{ +#ifdef HAVE_NGTCP2_CONN_ENCODE_0RTT_TRANSPORT_PARAMS + FILE* out; + uint8_t buf[1024]; + ngtcp2_ssize len = ngtcp2_conn_encode_0rtt_transport_params(data->conn, + buf, sizeof(buf)); + if(len < 0) { + log_err("ngtcp2_conn_encode_0rtt_transport_params failed: %s", + ngtcp2_strerror(len)); + return; + } + out = fopen(data->transport_file, "w"); + if(!out) { + perror(data->transport_file); + return; + } + if(fwrite(buf, 1, len, out) != (size_t)len) { + log_err("fwrite %s failed: %s", data->transport_file, + strerror(errno)); + } + if(ferror(out)) { + verbose(1, "There was an error writing %s: %s", + data->transport_file, strerror(errno)); + } + fclose(out); +#else + struct ngtcp2_transport_params params; + memset(¶ms, 0, sizeof(params)); + ngtcp2_conn_get_remote_transport_params(data->conn, ¶ms); + transport_file_write(data->transport_file, ¶ms); +#endif +} + +#ifndef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_CLIENT_CONTEXT +/** applicatation rx key callback, this is where the rx key is set, + * and streams can be opened, like http3 unidirectional streams, like + * the http3 control and http3 qpack encode and decoder streams. */ +static int +application_rx_key_cb(struct doq_client_data* data) +{ + verbose(1, "application_rx_key_cb callback"); + verbose(1, "ngtcp2_conn_get_max_data_left is %d", + (int)ngtcp2_conn_get_max_data_left(data->conn)); +#ifdef HAVE_NGTCP2_CONN_GET_MAX_LOCAL_STREAMS_UNI + verbose(1, "ngtcp2_conn_get_max_local_streams_uni is %d", + (int)ngtcp2_conn_get_max_local_streams_uni(data->conn)); +#endif + verbose(1, "ngtcp2_conn_get_streams_uni_left is %d", + (int)ngtcp2_conn_get_streams_uni_left(data->conn)); + verbose(1, "ngtcp2_conn_get_streams_bidi_left is %d", + (int)ngtcp2_conn_get_streams_bidi_left(data->conn)); + if(data->transport_file) { + early_data_write_transport(data); + } + return 1; +} + +/** quic_method set_encryption_secrets function */ +static int +set_encryption_secrets(SSL *ssl, OSSL_ENCRYPTION_LEVEL ossl_level, + const uint8_t *read_secret, const uint8_t *write_secret, + size_t secret_len) +{ + struct doq_client_data* data = get_app_data(ssl); +#ifdef HAVE_NGTCP2_ENCRYPTION_LEVEL + ngtcp2_encryption_level +#else + ngtcp2_crypto_level +#endif + level = +#ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_FROM_OSSL_ENCRYPTION_LEVEL + ngtcp2_crypto_quictls_from_ossl_encryption_level(ossl_level); +#else + ngtcp2_crypto_openssl_from_ossl_encryption_level(ossl_level); +#endif + + if(read_secret) { + if(ngtcp2_crypto_derive_and_install_rx_key(data->conn, NULL, + NULL, NULL, level, read_secret, secret_len) != 0) { + log_err("ngtcp2_crypto_derive_and_install_rx_key failed"); + return 0; + } + if(level == NGTCP2_CRYPTO_LEVEL_APPLICATION) { + if(!application_rx_key_cb(data)) + return 0; + } + } + + if(write_secret) { + if(ngtcp2_crypto_derive_and_install_tx_key(data->conn, NULL, + NULL, NULL, level, write_secret, secret_len) != 0) { + log_err("ngtcp2_crypto_derive_and_install_tx_key failed"); + return 0; + } + } + return 1; +} + +/** quic_method add_handshake_data function */ +static int +add_handshake_data(SSL *ssl, OSSL_ENCRYPTION_LEVEL ossl_level, + const uint8_t *data, size_t len) +{ + struct doq_client_data* doqdata = get_app_data(ssl); +#ifdef HAVE_NGTCP2_ENCRYPTION_LEVEL + ngtcp2_encryption_level +#else + ngtcp2_crypto_level +#endif + level = +#ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_FROM_OSSL_ENCRYPTION_LEVEL + ngtcp2_crypto_quictls_from_ossl_encryption_level(ossl_level); +#else + ngtcp2_crypto_openssl_from_ossl_encryption_level(ossl_level); +#endif + int rv; + + rv = ngtcp2_conn_submit_crypto_data(doqdata->conn, level, data, len); + if(rv != 0) { + log_err("ngtcp2_conn_submit_crypto_data failed: %s", + ngtcp2_strerror(rv)); + ngtcp2_conn_set_tls_error(doqdata->conn, rv); + return 0; + } + return 1; +} + +/** quic_method flush_flight function */ +static int +flush_flight(SSL* ATTR_UNUSED(ssl)) +{ + return 1; +} + +/** quic_method send_alert function */ +static int +send_alert(SSL *ssl, enum ssl_encryption_level_t ATTR_UNUSED(level), + uint8_t alert) +{ + struct doq_client_data* data = get_app_data(ssl); + data->tls_alert = alert; + return 1; +} +#endif /* HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_CLIENT_CONTEXT */ + +/** new session callback. We can write it to file for resumption later. */ +static int +new_session_cb(SSL* ssl, SSL_SESSION* session) +{ + struct doq_client_data* data = get_app_data(ssl); + BIO *f; + log_assert(data->session_file); + verbose(1, "new session cb: the ssl session max_early_data_size is %u", + (unsigned)SSL_SESSION_get_max_early_data(session)); + f = BIO_new_file(data->session_file, "w"); + if(!f) { + log_err("Could not open %s: %s", data->session_file, + strerror(errno)); + return 0; + } + PEM_write_bio_SSL_SESSION(f, session); + BIO_free(f); + verbose(1, "written tls session to %s", data->session_file); + return 0; +} + +/** setup the TLS context */ +static SSL_CTX* +ctx_client_setup(void) +{ + SSL_CTX* ctx = SSL_CTX_new(TLS_client_method()); + if(!ctx) { + log_crypto_err("Could not SSL_CTX_new"); + exit(1); + } + SSL_CTX_set_min_proto_version(ctx, TLS1_3_VERSION); + SSL_CTX_set_max_proto_version(ctx, TLS1_3_VERSION); + SSL_CTX_set_default_verify_paths(ctx); +#ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_CLIENT_CONTEXT + if(ngtcp2_crypto_quictls_configure_client_context(ctx) != 0) { + log_err("ngtcp2_crypto_quictls_configure_client_context failed"); + exit(1); + } +#else + memset(&quic_method, 0, sizeof(quic_method)); + quic_method.set_encryption_secrets = &set_encryption_secrets; + quic_method.add_handshake_data = &add_handshake_data; + quic_method.flush_flight = &flush_flight; + quic_method.send_alert = &send_alert; + SSL_CTX_set_quic_method(ctx, &quic_method); +#endif + return ctx; +} + + +/* setup the TLS object */ +static SSL* +ssl_client_setup(struct doq_client_data* data) +{ + SSL* ssl = SSL_new(data->ctx); + if(!ssl) { + log_crypto_err("Could not SSL_new"); + exit(1); + } + set_app_data(ssl, data); + SSL_set_connect_state(ssl); + if(!SSL_set_fd(ssl, data->fd)) { + log_crypto_err("Could not SSL_set_fd"); + exit(1); + } + if((data->quic_version & 0xff000000) == 0xff000000) { + SSL_set_quic_use_legacy_codepoint(ssl, 1); + } else { + SSL_set_quic_use_legacy_codepoint(ssl, 0); + } + SSL_set_alpn_protos(ssl, (const unsigned char *)"\x03""doq", 4); + /* send the SNI host name */ + SSL_set_tlsext_host_name(ssl, "localhost"); + return ssl; +} + +/** get packet ecn information */ +static uint32_t +msghdr_get_ecn(struct msghdr* msg, int family) +{ +#ifndef S_SPLINT_S + struct cmsghdr* cmsg; + if(family == AF_INET6) { + for(cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL; + cmsg = CMSG_NXTHDR(msg, cmsg)) { + if(cmsg->cmsg_level == IPPROTO_IPV6 && + cmsg->cmsg_type == IPV6_TCLASS && + cmsg->cmsg_len != 0) { + uint8_t* ecn = (uint8_t*)CMSG_DATA(cmsg); + return *ecn; + } + } + return 0; + } + for(cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL; + cmsg = CMSG_NXTHDR(msg, cmsg)) { + if(cmsg->cmsg_level == IPPROTO_IP && + cmsg->cmsg_type == IP_TOS && + cmsg->cmsg_len != 0) { + uint8_t* ecn = (uint8_t*)CMSG_DATA(cmsg); + return *ecn; + } + } + return 0; +#endif /* S_SPLINT_S */ +} + +/** set the ecn on the transmission */ +static void +set_ecn(int fd, int family, uint32_t ecn) +{ + unsigned int val = ecn; + if(family == AF_INET6) { + if(setsockopt(fd, IPPROTO_IPV6, IPV6_TCLASS, &val, + (socklen_t)sizeof(val)) == -1) { + log_err("setsockopt(.. IPV6_TCLASS ..): %s", + strerror(errno)); + } + return; + } + if(setsockopt(fd, IPPROTO_IP, IP_TOS, &val, + (socklen_t)sizeof(val)) == -1) { + log_err("setsockopt(.. IP_TOS ..): %s", + strerror(errno)); + } +} + +/** send a packet */ +static int +doq_client_send_pkt(struct doq_client_data* data, uint32_t ecn, uint8_t* buf, + size_t buf_len, int is_blocked_pkt, int* send_is_blocked) +{ + struct msghdr msg; + struct iovec iov[1]; + ssize_t ret; + iov[0].iov_base = buf; + iov[0].iov_len = buf_len; + memset(&msg, 0, sizeof(msg)); + msg.msg_name = (void*)&data->dest_addr; + msg.msg_namelen = data->dest_addr_len; + msg.msg_iov = iov; + msg.msg_iovlen = 1; + set_ecn(data->fd, data->dest_addr.ss_family, ecn); + + for(;;) { + ret = sendmsg(data->fd, &msg, MSG_DONTWAIT); + if(ret == -1 && errno == EINTR) + continue; + break; + } + if(ret == -1) { + if(errno == EAGAIN) { + if(buf_len > + sldns_buffer_capacity(data->blocked_pkt)) + return 0; /* Cannot store it, but the buffers + are equal length and large enough, so this + should not happen. */ + data->have_blocked_pkt = 1; + if(send_is_blocked) + *send_is_blocked = 1; + /* If we already send the previously blocked packet, + * no need to copy it, otherwise store the packet for + * later. */ + if(!is_blocked_pkt) { + data->blocked_pkt_pi.ecn = ecn; + sldns_buffer_clear(data->blocked_pkt); + sldns_buffer_write(data->blocked_pkt, buf, + buf_len); + sldns_buffer_flip(data->blocked_pkt); + } + return 0; + } + log_err("doq sendmsg: %s", strerror(errno)); +#ifdef HAVE_NGTCP2_CCERR_DEFAULT + ngtcp2_ccerr_set_application_error(&data->ccerr, -1, NULL, 0); +#else + ngtcp2_connection_close_error_set_application_error(&data->last_error, -1, NULL, 0); +#endif + return 0; + } + return 1; +} + +/** change event write on fd to when we have data or when congested */ +static void +event_change_write(struct doq_client_data* data, int do_write) +{ + ub_event_del(data->ev); + if(do_write) { + ub_event_add_bits(data->ev, UB_EV_WRITE); + } else { + ub_event_del_bits(data->ev, UB_EV_WRITE); + } + if(ub_event_add(data->ev, NULL) != 0) { + fatal_exit("could not ub_event_add"); + } +} + +/** write the connection close, with possible error */ +static void +write_conn_close(struct doq_client_data* data) +{ + struct ngtcp2_path_storage ps; + struct ngtcp2_pkt_info pi; + ngtcp2_ssize ret; + if(!data->conn || +#ifdef HAVE_NGTCP2_CONN_IN_CLOSING_PERIOD + ngtcp2_conn_in_closing_period(data->conn) || +#else + ngtcp2_conn_is_in_closing_period(data->conn) || +#endif +#ifdef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD + ngtcp2_conn_in_draining_period(data->conn) +#else + ngtcp2_conn_is_in_draining_period(data->conn) +#endif + ) + return; + /* Drop blocked packet if there is one, the connection is being + * closed. And thus no further data traffic. */ + data->have_blocked_pkt = 0; + if( +#ifdef HAVE_NGTCP2_CCERR_DEFAULT + data->ccerr.type == NGTCP2_CCERR_TYPE_IDLE_CLOSE +#else + data->last_error.type == + NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT_IDLE_CLOSE +#endif + ) { + /* do not call ngtcp2_conn_write_connection_close on the + * connection because the ngtcp2_conn_handle_expiry call + * has returned NGTCP2_ERR_IDLE_CLOSE. But continue to close + * the connection. */ + return; + } + verbose(1, "write connection close"); + ngtcp2_path_storage_zero(&ps); + sldns_buffer_clear(data->pkt_buf); + ret = ngtcp2_conn_write_connection_close( + data->conn, &ps.path, &pi, sldns_buffer_begin(data->pkt_buf), + sldns_buffer_remaining(data->pkt_buf), +#ifdef HAVE_NGTCP2_CCERR_DEFAULT + &data->ccerr +#else + &data->last_error +#endif + , get_timestamp_nanosec()); + if(ret < 0) { + log_err("ngtcp2_conn_write_connection_close failed: %s", + ngtcp2_strerror(ret)); + return; + } + verbose(1, "write connection close packet length %d", (int)ret); + if(ret == 0) + return; + doq_client_send_pkt(data, pi.ecn, sldns_buffer_begin(data->pkt_buf), + ret, 0, NULL); +} + +/** disconnect we are done */ +static void +disconnect(struct doq_client_data* data) +{ + verbose(1, "disconnect"); + write_conn_close(data); + ub_event_base_loopexit(data->base); +} + +/** the expire timer callback */ +void doq_client_timer_cb(int ATTR_UNUSED(fd), + short ATTR_UNUSED(bits), void* arg) +{ + struct doq_client_data* data = (struct doq_client_data*)arg; + ngtcp2_tstamp now = get_timestamp_nanosec(); + int rv; + + verbose(1, "doq expire_timer"); + data->expire_timer_added = 0; + rv = ngtcp2_conn_handle_expiry(data->conn, now); + if(rv != 0) { + log_err("ngtcp2_conn_handle_expiry failed: %s", + ngtcp2_strerror(rv)); +#ifdef HAVE_NGTCP2_CCERR_DEFAULT + ngtcp2_ccerr_set_liberr(&data->ccerr, rv, NULL, 0); +#else + ngtcp2_connection_close_error_set_transport_error_liberr( + &data->last_error, rv, NULL, 0); +#endif + disconnect(data); + return; + } + update_timer(data); + on_write(data); +} + +/** update the timers */ +static void +update_timer(struct doq_client_data* data) +{ + ngtcp2_tstamp expiry = ngtcp2_conn_get_expiry(data->conn); + ngtcp2_tstamp now = get_timestamp_nanosec(); + ngtcp2_tstamp t; + struct timeval tv; + + if(expiry <= now) { + /* the timer has already expired, add with zero timeout */ + t = 0; + } else { + t = expiry - now; + } + + /* set the timer */ + if(data->expire_timer_added) { + ub_timer_del(data->expire_timer); + data->expire_timer_added = 0; + } + memset(&tv, 0, sizeof(tv)); + tv.tv_sec = t / NGTCP2_SECONDS; + tv.tv_usec = (t / NGTCP2_MICROSECONDS)%1000000; + verbose(1, "update_timer in %d.%6.6d secs", (int)tv.tv_sec, + (int)tv.tv_usec); + if(ub_timer_add(data->expire_timer, data->base, + &doq_client_timer_cb, data, &tv) != 0) { + log_err("timer_add failed: could not add expire timer"); + return; + } + data->expire_timer_added = 1; +} + +/** perform read operations on fd */ +static void +on_read(struct doq_client_data* data) +{ + struct sockaddr_storage addr; + struct iovec iov[1]; + struct msghdr msg; + union { + struct cmsghdr hdr; + char buf[256]; + } ancil; + int i; + ssize_t rcv; + ngtcp2_pkt_info pi; + int rv; + struct ngtcp2_path path; + + for(i=0; i<10; i++) { + msg.msg_name = &addr; + msg.msg_namelen = (socklen_t)sizeof(addr); + iov[0].iov_base = sldns_buffer_begin(data->pkt_buf); + iov[0].iov_len = sldns_buffer_remaining(data->pkt_buf); + msg.msg_iov = iov; + msg.msg_iovlen = 1; + msg.msg_control = ancil.buf; +#ifndef S_SPLINT_S + msg.msg_controllen = sizeof(ancil.buf); +#endif /* S_SPLINT_S */ + msg.msg_flags = 0; + + rcv = recvmsg(data->fd, &msg, MSG_DONTWAIT); + if(rcv == -1) { + if(errno == EINTR || errno == EAGAIN) + break; + log_err_addr("doq recvmsg", strerror(errno), + &data->dest_addr, sizeof(data->dest_addr_len)); + break; + } + + pi.ecn = msghdr_get_ecn(&msg, addr.ss_family); + verbose(1, "recvmsg %d ecn=0x%x", (int)rcv, (int)pi.ecn); + + memset(&path, 0, sizeof(path)); + path.local.addr = (void*)&data->local_addr; + path.local.addrlen = data->local_addr_len; + path.remote.addr = (void*)msg.msg_name; + path.remote.addrlen = msg.msg_namelen; + rv = ngtcp2_conn_read_pkt(data->conn, &path, &pi, + iov[0].iov_base, rcv, get_timestamp_nanosec()); + if(rv != 0) { + log_err("ngtcp2_conn_read_pkt failed: %s", + ngtcp2_strerror(rv)); + if( +#ifdef HAVE_NGTCP2_CCERR_DEFAULT + data->ccerr.error_code == 0 +#else + data->last_error.error_code == 0 +#endif + ) { + if(rv == NGTCP2_ERR_CRYPTO) { + /* in picotls the tls alert may need + * to be copied, but this is with + * openssl. And we have the value + * data.tls_alert. */ +#ifdef HAVE_NGTCP2_CCERR_DEFAULT + ngtcp2_ccerr_set_tls_alert( + &data->ccerr, data->tls_alert, + NULL, 0); +#else + ngtcp2_connection_close_error_set_transport_error_tls_alert( + &data->last_error, + data->tls_alert, NULL, 0); +#endif + } else { +#ifdef HAVE_NGTCP2_CCERR_DEFAULT + ngtcp2_ccerr_set_liberr(&data->ccerr, + rv, NULL, 0); +#else + ngtcp2_connection_close_error_set_transport_error_liberr( + &data->last_error, rv, NULL, + 0); +#endif + } + } + disconnect(data); + return; + } + } + + update_timer(data); +} + +/** the write of this query has completed, it has spooled to packets, + * set it to have the write done and move it to the list of receive streams. */ +static void +query_write_is_done(struct doq_client_data* data, + struct doq_client_stream* str) +{ + if(verbosity > 0) { + char* logs = client_stream_string(str); + verbose(1, "query %s write is done", logs); + free(logs); + } + str->write_is_done = 1; + stream_list_move(str, data->query_list_send, data->query_list_receive); +} + +/** write the data streams, if possible */ +static int +write_streams(struct doq_client_data* data) +{ + ngtcp2_path_storage ps; + ngtcp2_tstamp ts = get_timestamp_nanosec(); + struct doq_client_stream* str, *next; + uint32_t flags; + /* number of bytes that can be sent without packet pacing */ + size_t send_quantum = ngtcp2_conn_get_send_quantum(data->conn); + /* Overhead is the stream overhead of adding a header onto the data, + * this make sure the number of bytes to send in data bytes plus + * the overhead overshoots the target quantum by a smaller margin, + * and then it stops sending more bytes. With zero it would overshoot + * more, an accurate number would not overshoot. It is based on the + * stream frame header size. */ + size_t accumulated_send = 0, overhead_stream = 24, overhead_pkt = 60, + max_packet_size = 1200; + size_t num_packets = 0, max_packets = 65535; + ngtcp2_path_storage_zero(&ps); + str = data->query_list_send->first; + + if(data->cc_algo != NGTCP2_CC_ALGO_BBR +#ifdef NGTCP2_CC_ALGO_BBR_V2 + && data->cc_algo != NGTCP2_CC_ALGO_BBR_V2 +#endif +#ifdef NGTCP2_CC_ALGO_BBR2 + && data->cc_algo != NGTCP2_CC_ALGO_BBR2 +#endif + ) { + /* If we do not have a packet pacing congestion control + * algorithm, limit the number of packets. */ + max_packets = 10; + } + + /* loop like this, because at the start, the send list is empty, + * and we want to send handshake packets. But when there is a + * send_list, loop through that. */ + for(;;) { + int64_t stream_id; + ngtcp2_pkt_info pi; + ngtcp2_vec datav[2]; + size_t datav_count = 0; + int fin; + ngtcp2_ssize ret; + ngtcp2_ssize ndatalen = 0; + int send_is_blocked = 0; + + if(str) { + /* pick up next in case this one is deleted */ + next = str->next; + if(verbosity > 0) { + char* logs = client_stream_string(str); + verbose(1, "query %s write stream", logs); + free(logs); + } + stream_id = str->stream_id; + fin = 1; + if(str->nwrite < 2) { + str->data_tcplen = htons(str->data_len); + datav[0].base = ((uint8_t*)&str->data_tcplen)+str->nwrite; + datav[0].len = 2-str->nwrite; + datav[1].base = str->data; + datav[1].len = str->data_len; + datav_count = 2; + } else { + datav[0].base = str->data + (str->nwrite-2); + datav[0].len = str->data_len - (str->nwrite-2); + datav_count = 1; + } + } else { + next = NULL; + verbose(1, "write stream -1."); + stream_id = -1; + fin = 0; + datav[0].base = NULL; + datav[0].len = 0; + datav_count = 1; + } + + /* Does the first data entry fit into the send quantum? */ + /* Check if the data size sent, with a max of one full packet, + * with added stream header and packet header is allowed + * within the send quantum number of bytes. If not, it does + * not fit, and wait. */ + if(accumulated_send == 0 && ((datav_count == 1 && + (datav[0].len>max_packet_size?max_packet_size: + datav[0].len)+overhead_stream+overhead_pkt > + send_quantum) || + (datav_count == 2 && + (datav[0].len+datav[1].len>max_packet_size? + max_packet_size:datav[0].len+datav[1].len) + +overhead_stream+overhead_pkt > send_quantum))) { + /* congestion limited */ + ngtcp2_conn_update_pkt_tx_time(data->conn, ts); + event_change_write(data, 0); + /* update the timer to wait until it is possible to + * write again */ + update_timer(data); + return 0; + } + flags = 0; + if(str && str->next != NULL) { + /* Coalesce more data from more streams into this + * packet, if possible */ + /* There is more than one data entry in this send + * quantum, does the next one fit in the quantum? */ + size_t this_send, possible_next_send; + if(datav_count == 1) + this_send = datav[0].len; + else this_send = datav[0].len + datav[1].len; + if(this_send > max_packet_size) + this_send = max_packet_size; + if(str->next->nwrite < 2) + possible_next_send = (2-str->next->nwrite) + + str->next->data_len; + else possible_next_send = str->next->data_len - + (str->next->nwrite - 2); + if(possible_next_send > max_packet_size) + possible_next_send = max_packet_size; + /* Check if the data lengths that writev returned + * with stream headers added up so far, in + * accumulated_send, with added the data length + * of this send, with a max of one full packet, and + * the data length of the next possible send, with + * a max of one full packet, with a stream header for + * this_send and a stream header for the next possible + * send and a packet header, fit in the send quantum + * number of bytes. If so, ask to add more content + * to the packet with the more flag. */ + if(accumulated_send + this_send + possible_next_send + +2*overhead_stream+ overhead_pkt < send_quantum) + flags |= NGTCP2_WRITE_STREAM_FLAG_MORE; + } + if(fin) { + /* This is the final part of data for this stream */ + flags |= NGTCP2_WRITE_STREAM_FLAG_FIN; + } + sldns_buffer_clear(data->pkt_buf); + ret = ngtcp2_conn_writev_stream(data->conn, &ps.path, &pi, + sldns_buffer_begin(data->pkt_buf), + sldns_buffer_remaining(data->pkt_buf), &ndatalen, + flags, stream_id, datav, datav_count, ts); + if(ret < 0) { + if(ret == NGTCP2_ERR_WRITE_MORE) { + if(str) { + str->nwrite += ndatalen; + if(str->nwrite >= str->data_len+2) + query_write_is_done(data, str); + str = next; + accumulated_send += ndatalen + overhead_stream; + continue; + } + } + log_err("ngtcp2_conn_writev_stream failed: %s", + ngtcp2_strerror(ret)); +#ifdef HAVE_NGTCP2_CCERR_DEFAULT + ngtcp2_ccerr_set_liberr(&data->ccerr, ret, NULL, 0); +#else + ngtcp2_connection_close_error_set_transport_error_liberr( + &data->last_error, ret, NULL, 0); +#endif + disconnect(data); + return 0; + } + verbose(1, "writev_stream pkt size %d ndatawritten %d", + (int)ret, (int)ndatalen); + if(ndatalen >= 0 && str) { + /* add the new write offset */ + str->nwrite += ndatalen; + if(str->nwrite >= str->data_len+2) + query_write_is_done(data, str); + } + if(ret == 0) { + /* congestion limited */ + ngtcp2_conn_update_pkt_tx_time(data->conn, ts); + event_change_write(data, 0); + /* update the timer to wait until it is possible to + * write again */ + update_timer(data); + return 0; + } + if(!doq_client_send_pkt(data, pi.ecn, + sldns_buffer_begin(data->pkt_buf), ret, 0, + &send_is_blocked)) { + if(send_is_blocked) { + /* Blocked packet, wait until it is possible + * to write again and also set a timer. */ + event_change_write(data, 1); + update_timer(data); + return 0; + } + /* Packet could not be sent. Like lost and timeout. */ + ngtcp2_conn_update_pkt_tx_time(data->conn, ts); + event_change_write(data, 0); + update_timer(data); + return 0; + } + /* continue */ + if((size_t)ret >= send_quantum) + break; + send_quantum -= ret; + accumulated_send = 0; + str = next; + if(str == NULL) + break; + if(++num_packets == max_packets) + break; + } + ngtcp2_conn_update_pkt_tx_time(data->conn, ts); + event_change_write(data, 1); + return 1; +} + +/** send the blocked packet now that the stream is writable again. */ +static int +send_blocked_pkt(struct doq_client_data* data) +{ + ngtcp2_tstamp ts = get_timestamp_nanosec(); + int send_is_blocked = 0; + if(!doq_client_send_pkt(data, data->blocked_pkt_pi.ecn, + sldns_buffer_begin(data->pkt_buf), + sldns_buffer_limit(data->pkt_buf), 1, &send_is_blocked)) { + if(send_is_blocked) { + /* Send was blocked, again. Wait, again to retry. */ + event_change_write(data, 1); + /* make sure the timer is set while waiting */ + update_timer(data); + return 0; + } + /* The packed could not be sent. Like it was lost, timeout. */ + data->have_blocked_pkt = 0; + ngtcp2_conn_update_pkt_tx_time(data->conn, ts); + event_change_write(data, 0); + update_timer(data); + return 0; + } + /* The blocked packet has been sent, the holding buffer can be + * cleared. */ + data->have_blocked_pkt = 0; + ngtcp2_conn_update_pkt_tx_time(data->conn, ts); + return 1; +} + +/** perform write operations, if any, on fd */ +static void +on_write(struct doq_client_data* data) +{ + if(data->have_blocked_pkt) { + if(!send_blocked_pkt(data)) + return; + } + if( +#ifdef HAVE_NGTCP2_CONN_IN_CLOSING_PERIOD + ngtcp2_conn_in_closing_period(data->conn) +#else + ngtcp2_conn_is_in_closing_period(data->conn) +#endif + ) + return; + if(!write_streams(data)) + return; + update_timer(data); +} + +/** callback for main listening file descriptor */ +void +doq_client_event_cb(int ATTR_UNUSED(fd), short bits, void* arg) +{ + struct doq_client_data* data = (struct doq_client_data*)arg; + verbose(1, "doq_client_event_cb %s%s%s", + ((bits&UB_EV_READ)!=0?"EV_READ":""), + ((bits&(UB_EV_READ|UB_EV_WRITE))==(UB_EV_READ|UB_EV_WRITE)? + " ":""), + ((bits&UB_EV_WRITE)!=0?"EV_WRITE":"")); + if((bits&UB_EV_READ)) { + on_read(data); + } + /* Perform the write operation anyway. The read operation may + * have produced data, or there is content waiting and it is possible + * to write that. */ + on_write(data); +} + +/** read the TLS session from file */ +static int +early_data_setup_session(struct doq_client_data* data) +{ + SSL_SESSION* session; + BIO* f = BIO_new_file(data->session_file, "r"); + if(f == NULL) { + if(errno == ENOENT) { + verbose(1, "session file %s does not exist", + data->session_file); + return 0; + } + log_err("Could not read %s: %s", data->session_file, + strerror(errno)); + return 0; + } + session = PEM_read_bio_SSL_SESSION(f, NULL, 0, NULL); + if(session == NULL) { + log_crypto_err("Could not read session file with PEM_read_bio_SSL_SESSION"); + BIO_free(f); + return 0; + } + BIO_free(f); + if(!SSL_set_session(data->ssl, session)) { + log_crypto_err("Could not SSL_set_session"); + SSL_SESSION_free(session); + return 0; + } + if(SSL_SESSION_get_max_early_data(session) == 0) { + log_err("TLS session early data is 0"); + SSL_SESSION_free(session); + return 0; + } + SSL_set_quic_early_data_enabled(data->ssl, 1); + SSL_SESSION_free(session); + return 1; +} + +#ifndef HAVE_NGTCP2_CONN_ENCODE_0RTT_TRANSPORT_PARAMS +/** parse one line from the transport file */ +static int +transport_parse_line(struct ngtcp2_transport_params* params, char* line) +{ + if(strncmp(line, "initial_max_streams_bidi=", 25) == 0) { + params->initial_max_streams_bidi = atoi(line+25); + return 1; + } + if(strncmp(line, "initial_max_streams_uni=", 24) == 0) { + params->initial_max_streams_uni = atoi(line+24); + return 1; + } + if(strncmp(line, "initial_max_stream_data_bidi_local=", 35) == 0) { + params->initial_max_stream_data_bidi_local = atoi(line+35); + return 1; + } + if(strncmp(line, "initial_max_stream_data_bidi_remote=", 36) == 0) { + params->initial_max_stream_data_bidi_remote = atoi(line+36); + return 1; + } + if(strncmp(line, "initial_max_stream_data_uni=", 28) == 0) { + params->initial_max_stream_data_uni = atoi(line+28); + return 1; + } + if(strncmp(line, "initial_max_data=", 17) == 0) { + params->initial_max_data = atoi(line+17); + return 1; + } + if(strncmp(line, "active_connection_id_limit=", 27) == 0) { + params->active_connection_id_limit = atoi(line+27); + return 1; + } + if(strncmp(line, "max_datagram_frame_size=", 24) == 0) { + params->max_datagram_frame_size = atoi(line+24); + return 1; + } + return 0; +} +#endif /* HAVE_NGTCP2_CONN_ENCODE_0RTT_TRANSPORT_PARAMS */ + +/** setup the early data transport file and read it */ +static int +early_data_setup_transport(struct doq_client_data* data) +{ +#ifdef HAVE_NGTCP2_CONN_ENCODE_0RTT_TRANSPORT_PARAMS + FILE* in; + uint8_t buf[1024]; + size_t len; + int rv; + in = fopen(data->transport_file, "r"); + if(!in) { + if(errno == ENOENT) { + verbose(1, "transport file %s does not exist", + data->transport_file); + return 0; + } + perror(data->transport_file); + return 0; + } + len = fread(buf, 1, sizeof(buf), in); + if(ferror(in)) { + log_err("%s: read failed: %s", data->transport_file, + strerror(errno)); + fclose(in); + return 0; + } + fclose(in); + rv = ngtcp2_conn_decode_and_set_0rtt_transport_params(data->conn, + buf, len); + if(rv != 0) { + log_err("ngtcp2_conn_decode_and_set_0rtt_transport_params failed: %s", + ngtcp2_strerror(rv)); + return 0; + } + return 1; +#else + FILE* in; + char buf[1024]; + struct ngtcp2_transport_params params; + memset(¶ms, 0, sizeof(params)); + in = fopen(data->transport_file, "r"); + if(!in) { + if(errno == ENOENT) { + verbose(1, "transport file %s does not exist", + data->transport_file); + return 0; + } + perror(data->transport_file); + return 0; + } + while(!feof(in)) { + if(!fgets(buf, sizeof(buf), in)) { + log_err("%s: read failed: %s", data->transport_file, + strerror(errno)); + fclose(in); + return 0; + } + if(!transport_parse_line(¶ms, buf)) { + log_err("%s: could not parse line '%s'", + data->transport_file, buf); + fclose(in); + return 0; + } + } + fclose(in); + ngtcp2_conn_set_early_remote_transport_params(data->conn, ¶ms); +#endif + return 1; +} + +/** setup for early data, read the transport file and session file */ +static void +early_data_setup(struct doq_client_data* data) +{ + if(!early_data_setup_session(data)) { + verbose(1, "TLS session resumption failed, early data is disabled"); + data->early_data_enabled = 0; + return; + } + if(!early_data_setup_transport(data)) { + verbose(1, "Transport parameters set failed, early data is disabled"); + data->early_data_enabled = 0; + return; + } +} + +/** start the early data transmission */ +static void +early_data_start(struct doq_client_data* data) +{ + query_streams_start(data); + on_write(data); +} + +/** create doq_client_data */ +static struct doq_client_data* +create_doq_client_data(const char* svr, int port, struct ub_event_base* base, + const char* transport_file, const char* session_file, int quiet) +{ + struct doq_client_data* data; + data = calloc(1, sizeof(*data)); + if(!data) fatal_exit("calloc failed: out of memory"); + data->base = base; + data->rnd = ub_initstate(NULL); + if(!data->rnd) fatal_exit("ub_initstate failed: out of memory"); + data->svr = svr; + get_dest_addr(data, svr, port); + data->port = port; + data->quiet = quiet; + data->pkt_buf = sldns_buffer_new(65552); + if(!data->pkt_buf) + fatal_exit("sldns_buffer_new failed: out of memory"); + data->blocked_pkt = sldns_buffer_new(65552); + if(!data->blocked_pkt) + fatal_exit("sldns_buffer_new failed: out of memory"); + data->fd = open_svr_udp(data); + get_local_addr(data); + data->conn = conn_client_setup(data); +#ifdef HAVE_NGTCP2_CCERR_DEFAULT + ngtcp2_ccerr_default(&data->ccerr); +#else + ngtcp2_connection_close_error_default(&data->last_error); +#endif + data->transport_file = transport_file; + data->session_file = session_file; + if(data->transport_file && data->session_file) + data->early_data_enabled = 1; + + generate_static_secret(data, 32); + data->ctx = ctx_client_setup(); + if(data->session_file) { + SSL_CTX_set_session_cache_mode(data->ctx, + SSL_SESS_CACHE_CLIENT | + SSL_SESS_CACHE_NO_INTERNAL_STORE); + SSL_CTX_sess_set_new_cb(data->ctx, new_session_cb); + } + data->ssl = ssl_client_setup(data); + ngtcp2_conn_set_tls_native_handle(data->conn, data->ssl); + if(data->early_data_enabled) + early_data_setup(data); + + data->ev = ub_event_new(base, data->fd, UB_EV_READ | UB_EV_WRITE | + UB_EV_PERSIST, doq_client_event_cb, data); + if(!data->ev) { + fatal_exit("could not ub_event_new"); + } + if(ub_event_add(data->ev, NULL) != 0) { + fatal_exit("could not ub_event_add"); + } + data->expire_timer = ub_event_new(data->base, -1, + UB_EV_TIMEOUT, &doq_client_timer_cb, data); + if(!data->expire_timer) + fatal_exit("could not ub_event_new"); + data->query_list_start = stream_list_create(); + data->query_list_send = stream_list_create(); + data->query_list_receive = stream_list_create(); + data->query_list_stop = stream_list_create(); + return data; +} + +/** delete doq_client_data */ +static void +delete_doq_client_data(struct doq_client_data* data) +{ + if(!data) + return; +#if defined(NGTCP2_USE_GENERIC_SOCKADDR) || defined(NGTCP2_USE_GENERIC_IPV6_SOCKADDR) + if(data->conn && data->dest_addr_len != 0) { + if(addr_is_ip6(&data->dest_addr, data->dest_addr_len)) { +# if defined(NGTCP2_USE_GENERIC_SOCKADDR) || defined(NGTCP2_USE_GENERIC_IPV6_SOCKADDR) + const struct ngtcp2_path* path6 = ngtcp2_conn_get_path(data->conn); + free(path6->local.addr); + free(path6->remote.addr); +# endif + } else { +# if defined(NGTCP2_USE_GENERIC_SOCKADDR) + const struct ngtcp2_path* path = ngtcp2_conn_get_path(data->conn); + free(path->local.addr); + free(path->remote.addr); +# endif + } + } +#endif + ngtcp2_conn_del(data->conn); + SSL_free(data->ssl); + sldns_buffer_free(data->pkt_buf); + sldns_buffer_free(data->blocked_pkt); + if(data->fd != -1) + sock_close(data->fd); + SSL_CTX_free(data->ctx); + stream_list_free(data->query_list_start); + stream_list_free(data->query_list_send); + stream_list_free(data->query_list_receive); + stream_list_free(data->query_list_stop); + ub_randfree(data->rnd); + if(data->ev) { + ub_event_del(data->ev); + ub_event_free(data->ev); + } + if(data->expire_timer_added) + ub_timer_del(data->expire_timer); + ub_event_free(data->expire_timer); + free(data->static_secret_data); + free(data); +} + +/** create the event base that registers events and timers */ +static struct ub_event_base* +create_event_base(time_t* secs, struct timeval* now) +{ + struct ub_event_base* base; + const char *evnm="event", *evsys="", *evmethod=""; + + memset(now, 0, sizeof(*now)); + base = ub_default_event_base(1, secs, now); + if(!base) fatal_exit("could not create ub_event base"); + + ub_get_event_sys(base, &evnm, &evsys, &evmethod); + if(verbosity) log_info("%s %s uses %s method", evnm, evsys, evmethod); + + return base; +} + +/** enter a query into the query list */ +static void +client_enter_query_buf(struct doq_client_data* data, struct sldns_buffer* buf) +{ + struct doq_client_stream* str; + str = client_stream_create(buf); + if(!str) + fatal_exit("client_stream_create failed: out of memory"); + stream_list_append(data->query_list_start, str); +} + +/** enter the queries into the query list */ +static void +client_enter_queries(struct doq_client_data* data, char** qs, int count) +{ + int i; + for(i=0; i 0) { + char* str; + log_buf(1, "send query", buf); + str = sldns_wire2str_pkt(sldns_buffer_begin(buf), + sldns_buffer_limit(buf)); + if(!str) verbose(1, "could not sldns_wire2str_pkt"); + else verbose(1, "send query:\n%s", str); + free(str); + } + client_enter_query_buf(data, buf); + sldns_buffer_free(buf); + } +} + +/** run the dohclient queries */ +static void run(const char* svr, int port, char** qs, int count, + const char* transport_file, const char* session_file, int quiet) +{ + time_t secs = 0; + struct timeval now; + struct ub_event_base* base; + struct doq_client_data* data; + + /* setup */ + base = create_event_base(&secs, &now); + data = create_doq_client_data(svr, port, base, transport_file, + session_file, quiet); + client_enter_queries(data, qs, count); + if(data->early_data_enabled) + early_data_start(data); + + /* run the queries */ + ub_event_base_dispatch(base); + + /* cleanup */ + delete_doq_client_data(data); + ub_event_base_free(base); +} +#endif /* HAVE_NGTCP2 */ + +#ifdef HAVE_NGTCP2 +/** getopt global, in case header files fail to declare it. */ +extern int optind; +/** getopt global, in case header files fail to declare it. */ +extern char* optarg; +int main(int ATTR_UNUSED(argc), char** ATTR_UNUSED(argv)) +{ + int c; + int port = UNBOUND_DNS_OVER_QUIC_PORT, quiet = 0; + const char* svr = "127.0.0.1", *transport_file = NULL, + *session_file = NULL; +#ifdef USE_WINSOCK + WSADATA wsa_data; + if(WSAStartup(MAKEWORD(2,2), &wsa_data) != 0) { + printf("WSAStartup failed\n"); + return 1; + } +#endif + checklock_set_output_name("ublocktrace-doqclient"); + checklock_start(); + log_init(0, 0, 0); + log_ident_set("doqclient"); + + while((c=getopt(argc, argv, "hp:qs:vx:y:")) != -1) { + switch(c) { + case 'p': + if(atoi(optarg)==0 && strcmp(optarg,"0")!=0) { + printf("error parsing port, " + "number expected: %s\n", optarg); + return 1; + } + port = atoi(optarg); + break; + case 'q': + quiet++; + break; + case 's': + svr = optarg; + break; + case 'v': + verbosity++; + break; + case 'x': + transport_file = optarg; + break; + case 'y': + session_file = optarg; + break; + case 'h': + case '?': + default: + usage(argv); + } + } + + argc -= optind; + argv += optind; + + if(argc%3!=0) { + printf("Invalid input. Specify qname, qtype, and qclass.\n"); + return 1; + } + if(port == 53) { + printf("Error: port number 53 not for DNS over QUIC. Port number 53 is not allowed to be used with DNS over QUIC. It is used for DNS datagrams.\n"); + return 1; + } + + run(svr, port, argv, argc, transport_file, session_file, quiet); + + checklock_stop(); +#ifdef USE_WINSOCK + WSACleanup(); +#endif + return 0; +} +#else /* HAVE_NGTCP2 */ +int main(int ATTR_UNUSED(argc), char** ATTR_UNUSED(argv)) +{ + printf("Compiled without ngtcp2 for QUIC, cannot run doqclient.\n"); + return 1; +} +#endif /* HAVE_NGTCP2 */ + +/***--- definitions to make fptr_wlist work. ---***/ +/* These are callbacks, similar to smallapp callbacks, except the debug + * tool callbacks are not in it */ +struct tube; +struct query_info; +#include "util/data/packed_rrset.h" +#include "daemon/worker.h" +#include "daemon/remote.h" +#include "util/fptr_wlist.h" +#include "libunbound/context.h" + +void worker_handle_control_cmd(struct tube* ATTR_UNUSED(tube), + uint8_t* ATTR_UNUSED(buffer), size_t ATTR_UNUSED(len), + int ATTR_UNUSED(error), void* ATTR_UNUSED(arg)) +{ + log_assert(0); +} + +int worker_handle_request(struct comm_point* ATTR_UNUSED(c), + void* ATTR_UNUSED(arg), int ATTR_UNUSED(error), + struct comm_reply* ATTR_UNUSED(repinfo)) +{ + log_assert(0); + return 0; +} + +int worker_handle_service_reply(struct comm_point* ATTR_UNUSED(c), + void* ATTR_UNUSED(arg), int ATTR_UNUSED(error), + struct comm_reply* ATTR_UNUSED(reply_info)) +{ + log_assert(0); + return 0; +} + +int remote_accept_callback(struct comm_point* ATTR_UNUSED(c), + void* ATTR_UNUSED(arg), int ATTR_UNUSED(error), + struct comm_reply* ATTR_UNUSED(repinfo)) +{ + log_assert(0); + return 0; +} + +int remote_control_callback(struct comm_point* ATTR_UNUSED(c), + void* ATTR_UNUSED(arg), int ATTR_UNUSED(error), + struct comm_reply* ATTR_UNUSED(repinfo)) +{ + log_assert(0); + return 0; +} + +void worker_sighandler(int ATTR_UNUSED(sig), void* ATTR_UNUSED(arg)) +{ + log_assert(0); +} + +struct outbound_entry* worker_send_query( + struct query_info* ATTR_UNUSED(qinfo), uint16_t ATTR_UNUSED(flags), + int ATTR_UNUSED(dnssec), int ATTR_UNUSED(want_dnssec), + int ATTR_UNUSED(nocaps), int ATTR_UNUSED(check_ratelimit), + struct sockaddr_storage* ATTR_UNUSED(addr), + socklen_t ATTR_UNUSED(addrlen), uint8_t* ATTR_UNUSED(zone), + size_t ATTR_UNUSED(zonelen), int ATTR_UNUSED(tcp_upstream), + int ATTR_UNUSED(ssl_upstream), char* ATTR_UNUSED(tls_auth_name), + struct module_qstate* ATTR_UNUSED(q), int* ATTR_UNUSED(was_ratelimited)) +{ + log_assert(0); + return 0; +} + +#ifdef UB_ON_WINDOWS +void +worker_win_stop_cb(int ATTR_UNUSED(fd), short ATTR_UNUSED(ev), void* + ATTR_UNUSED(arg)) { + log_assert(0); +} + +void +wsvc_cron_cb(void* ATTR_UNUSED(arg)) +{ + log_assert(0); +} +#endif /* UB_ON_WINDOWS */ + +void +worker_alloc_cleanup(void* ATTR_UNUSED(arg)) +{ + log_assert(0); +} + +struct outbound_entry* libworker_send_query( + struct query_info* ATTR_UNUSED(qinfo), uint16_t ATTR_UNUSED(flags), + int ATTR_UNUSED(dnssec), int ATTR_UNUSED(want_dnssec), + int ATTR_UNUSED(nocaps), int ATTR_UNUSED(check_ratelimit), + struct sockaddr_storage* ATTR_UNUSED(addr), + socklen_t ATTR_UNUSED(addrlen), uint8_t* ATTR_UNUSED(zone), + size_t ATTR_UNUSED(zonelen), int ATTR_UNUSED(tcp_upstream), + int ATTR_UNUSED(ssl_upstream), char* ATTR_UNUSED(tls_auth_name), + struct module_qstate* ATTR_UNUSED(q), int* ATTR_UNUSED(was_ratelimited)) +{ + log_assert(0); + return 0; +} + +int libworker_handle_service_reply(struct comm_point* ATTR_UNUSED(c), + void* ATTR_UNUSED(arg), int ATTR_UNUSED(error), + struct comm_reply* ATTR_UNUSED(reply_info)) +{ + log_assert(0); + return 0; +} + +void libworker_handle_control_cmd(struct tube* ATTR_UNUSED(tube), + uint8_t* ATTR_UNUSED(buffer), size_t ATTR_UNUSED(len), + int ATTR_UNUSED(error), void* ATTR_UNUSED(arg)) +{ + log_assert(0); +} + +void libworker_fg_done_cb(void* ATTR_UNUSED(arg), int ATTR_UNUSED(rcode), + struct sldns_buffer* ATTR_UNUSED(buf), enum sec_status ATTR_UNUSED(s), + char* ATTR_UNUSED(why_bogus), int ATTR_UNUSED(was_ratelimited)) +{ + log_assert(0); +} + +void libworker_bg_done_cb(void* ATTR_UNUSED(arg), int ATTR_UNUSED(rcode), + struct sldns_buffer* ATTR_UNUSED(buf), enum sec_status ATTR_UNUSED(s), + char* ATTR_UNUSED(why_bogus), int ATTR_UNUSED(was_ratelimited)) +{ + log_assert(0); +} + +void libworker_event_done_cb(void* ATTR_UNUSED(arg), int ATTR_UNUSED(rcode), + struct sldns_buffer* ATTR_UNUSED(buf), enum sec_status ATTR_UNUSED(s), + char* ATTR_UNUSED(why_bogus), int ATTR_UNUSED(was_ratelimited)) +{ + log_assert(0); +} + +int context_query_cmp(const void* ATTR_UNUSED(a), const void* ATTR_UNUSED(b)) +{ + log_assert(0); + return 0; +} + +void worker_stat_timer_cb(void* ATTR_UNUSED(arg)) +{ + log_assert(0); +} + +void worker_probe_timer_cb(void* ATTR_UNUSED(arg)) +{ + log_assert(0); +} + +void worker_start_accept(void* ATTR_UNUSED(arg)) +{ + log_assert(0); +} + +void worker_stop_accept(void* ATTR_UNUSED(arg)) +{ + log_assert(0); +} + +/** keep track of lock id in lock-verify application */ +struct order_id { + /** the thread id that created it */ + int thr; + /** the instance number of creation */ + int instance; +}; + +int order_lock_cmp(const void* e1, const void* e2) +{ + const struct order_id* o1 = e1; + const struct order_id* o2 = e2; + if(o1->thr < o2->thr) return -1; + if(o1->thr > o2->thr) return 1; + if(o1->instance < o2->instance) return -1; + if(o1->instance > o2->instance) return 1; + return 0; +} + +int +codeline_cmp(const void* a, const void* b) +{ + return strcmp(a, b); +} + +int replay_var_compare(const void* ATTR_UNUSED(a), const void* ATTR_UNUSED(b)) +{ + log_assert(0); + return 0; +} + +void remote_get_opt_ssl(char* ATTR_UNUSED(str), void* ATTR_UNUSED(arg)) +{ + log_assert(0); +} diff --git a/testcode/fake_event.c b/testcode/fake_event.c index a517fa5f3..2f60b1381 100644 --- a/testcode/fake_event.c +++ b/testcode/fake_event.c @@ -939,6 +939,11 @@ listen_create(struct comm_base* base, struct listen_port* ATTR_UNUSED(ports), int ATTR_UNUSED(http_notls), struct tcl_list* ATTR_UNUSED(tcp_conn_limit), void* ATTR_UNUSED(sslctx), struct dt_env* ATTR_UNUSED(dtenv), + struct doq_table* ATTR_UNUSED(table), + struct ub_randstate* ATTR_UNUSED(rnd), + const char* ATTR_UNUSED(ssl_service_key), + const char* ATTR_UNUSED(ssl_service_pem), + struct config_file* ATTR_UNUSED(cfg), comm_point_callback_type* cb, void *cb_arg) { struct replay_runtime* runtime = (struct replay_runtime*)base; diff --git a/testcode/testbound.c b/testcode/testbound.c index 70feb7972..442e23434 100644 --- a/testcode/testbound.c +++ b/testcode/testbound.c @@ -600,3 +600,52 @@ void listen_desetup_locks(void) { /* nothing */ } + +#ifdef HAVE_NGTCP2 +void comm_point_doq_callback(int ATTR_UNUSED(fd), short ATTR_UNUSED(event), + void* ATTR_UNUSED(arg)) +{ + /* nothing */ +} + +int doq_conn_cmp(const void* ATTR_UNUSED(key1), const void* ATTR_UNUSED(key2)) +{ + return 0; +} + +int doq_conid_cmp(const void* ATTR_UNUSED(key1), const void* ATTR_UNUSED(key2)) +{ + return 0; +} + +int doq_timer_cmp(const void* ATTR_UNUSED(key1), const void* ATTR_UNUSED(key2)) +{ + return 0; +} + +int doq_stream_cmp(const void* ATTR_UNUSED(key1), const void* ATTR_UNUSED(key2)) +{ + return 0; +} + +struct doq_table* doq_table_create(struct config_file* ATTR_UNUSED(cfg), + struct ub_randstate* ATTR_UNUSED(rnd)) +{ + return calloc(1, sizeof(struct doq_table)); +} + +void doq_table_delete(struct doq_table* table) +{ + free(table); +} + +void doq_timer_cb(void* ATTR_UNUSED(arg)) +{ + /* nothing */ +} + +size_t doq_table_quic_size_get(struct doq_table* ATTR_UNUSED(table)) +{ + return 0; +} +#endif diff --git a/testcode/unitdoq.c b/testcode/unitdoq.c new file mode 100644 index 000000000..2b9160970 --- /dev/null +++ b/testcode/unitdoq.c @@ -0,0 +1,84 @@ +/* + * testcode/unitdoq.c - unit test for doq routines. + * + * Copyright (c) 2022, NLnet Labs. All rights reserved. + * + * This software is open source. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * Neither the name of the NLNET LABS nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +/** + * \file + * Calls doq related unit tests. Exits with code 1 on a failure. + */ + +#include "config.h" + +#ifdef HAVE_NGTCP2 + +#include "util/netevent.h" +#include "services/listen_dnsport.h" +#include "testcode/unitmain.h" + +/** check the size of a connection for doq */ +static void +doq_size_conn_check() +{ + /* Printout the size of one doq connection, in memory usage. + * A connection with a couple cids, of type doq_conid, and + * it has one stream, and that has a query and an answer. */ + size_t answer_size = 233; /* size of www.nlnetlabs.nl minimal answer + with dnssec and one A record. The unsigned answer is 176 with + additional data, 61 bytes minimal response one A record. */ + size_t query_size = 45; /* size of query for www.nlnetlabs.nl, with + an EDNS record with DO flag. */ + size_t conn_size = sizeof(struct doq_conn); + size_t conid_size = sizeof(struct doq_conid); + size_t stream_size = sizeof(struct doq_stream); + + conn_size += 16; /* DCID len in the conn key */ + conn_size += 0; /* the size of the ngtcp2_conn */ + conn_size += 0; /* the size of the SSL record */ + conn_size += 0; /* size of the close pkt, + but we do not count it here. Only if the conn gets closed. */ + conid_size += 16; /* the dcid of the conn key */ + conid_size += 16; /* the cid */ + stream_size += query_size; /* size of in buffer */ + stream_size += answer_size; /* size of out buffer */ + printf("doq connection size %u bytes\n", (unsigned)(conn_size + + conid_size*3 + stream_size)); +} + +void doq_test(void) +{ + unit_show_feature("doq"); + doq_size_conn_check(); +} +#endif /* HAVE_NGTCP2 */ diff --git a/testcode/unitmain.c b/testcode/unitmain.c index 9129d722b..653d3efbe 100644 --- a/testcode/unitmain.c +++ b/testcode/unitmain.c @@ -1432,6 +1432,9 @@ main(int argc, char* argv[]) #ifdef CLIENT_SUBNET ecs_test(); #endif /* CLIENT_SUBNET */ +#ifdef HAVE_NGTCP2 + doq_test(); +#endif /* HAVE_NGTCP2 */ if(log_get_lock()) { lock_basic_destroy((lock_basic_type*)log_get_lock()); } diff --git a/testcode/unitmain.h b/testcode/unitmain.h index adcd74f77..99d5240d2 100644 --- a/testcode/unitmain.h +++ b/testcode/unitmain.h @@ -84,5 +84,7 @@ void authzone_test(void); void zonemd_test(void); /** unit test for tcp_reuse functions */ void tcpreuse_test(void); +/** unit test for doq functions */ +void doq_test(void); #endif /* TESTCODE_UNITMAIN_H */ diff --git a/testdata/doq_downstream.tdir/doq_downstream.conf b/testdata/doq_downstream.tdir/doq_downstream.conf new file mode 100644 index 000000000..babd35041 --- /dev/null +++ b/testdata/doq_downstream.tdir/doq_downstream.conf @@ -0,0 +1,21 @@ +server: + verbosity: 2 + # num-threads: 1 + interface: 127.0.0.1@@PORT@ + quic-port: @PORT@ + tls-service-key: "unbound_server.key" + tls-service-pem: "unbound_server.pem" + use-syslog: no + directory: . + pidfile: "unbound.pid" + chroot: "" + username: "" + do-not-query-localhost: no + + local-zone: "example.net" static + local-data: "www.example.net. IN A 1.2.3.4" + local-zone: "drop.net" deny + +forward-zone: + name: "." + forward-addr: "127.0.0.1@@TOPORT@" diff --git a/testdata/doq_downstream.tdir/doq_downstream.dsc b/testdata/doq_downstream.tdir/doq_downstream.dsc new file mode 100644 index 000000000..1e0b19d50 --- /dev/null +++ b/testdata/doq_downstream.tdir/doq_downstream.dsc @@ -0,0 +1,16 @@ +BaseName: doq_downstream +Version: 1.0 +Description: Test DNS-over-QUIC query processing +CreationDate: Mon Aug 01 16:00:00 CEST 2022 +Maintainer: +Category: +Component: +CmdDepends: +Depends: +Help: +Pre: doq_downstream.pre +Post: doq_downstream.post +Test: doq_downstream.test +AuxFiles: +Passed: +Failure: diff --git a/testdata/doq_downstream.tdir/doq_downstream.post b/testdata/doq_downstream.tdir/doq_downstream.post new file mode 100644 index 000000000..f1a31be3c --- /dev/null +++ b/testdata/doq_downstream.tdir/doq_downstream.post @@ -0,0 +1,13 @@ +# #-- doq_downstream.post --# +# source the master var file when it's there +[ -f ../.tpkg.var.master ] && source ../.tpkg.var.master +# source the test var file when it's there +[ -f .tpkg.var.test ] && source .tpkg.var.test +# +# do your teardown here +PRE="../.." +. ../common.sh +kill_pid $FWD_PID +if test -f unbound.pid; then + kill_pid $UNBOUND_PID +fi diff --git a/testdata/doq_downstream.tdir/doq_downstream.pre b/testdata/doq_downstream.tdir/doq_downstream.pre new file mode 100644 index 000000000..f748cc1f5 --- /dev/null +++ b/testdata/doq_downstream.tdir/doq_downstream.pre @@ -0,0 +1,44 @@ +# #-- doq_downstream.pre--# +# source the master var file when it's there +[ -f ../.tpkg.var.master ] && source ../.tpkg.var.master +# use .tpkg.var.test for in test variable passing +[ -f .tpkg.var.test ] && source .tpkg.var.test + +PRE="../.." +. ../common.sh +if grep "define HAVE_NGTCP2 1" $PRE/config.h; then echo test enabled; else skip_test "test skipped"; fi + +if test -f $PRE/unbound_do_valgrind_in_test; then + do_valgrind=yes +else + do_valgrind=no +fi +VALGRIND_FLAGS="--leak-check=full --show-leak-kinds=all" + +get_random_port 2 +UNBOUND_PORT=$RND_PORT +FWD_PORT=$(($RND_PORT + 1)) +echo "UNBOUND_PORT=$UNBOUND_PORT" >> .tpkg.var.test +echo "FWD_PORT=$FWD_PORT" >> .tpkg.var.test + +# start forwarder +get_ldns_testns +$LDNS_TESTNS -p $FWD_PORT doq_downstream.testns >fwd.log 2>&1 & +FWD_PID=$! +echo "FWD_PID=$FWD_PID" >> .tpkg.var.test + +# make config file +sed -e 's/@PORT\@/'$UNBOUND_PORT'/' -e 's/@TOPORT\@/'$FWD_PORT'/' < doq_downstream.conf > ub.conf +# start unbound in the background +if test $do_valgrind = "yes"; then +valgrind $VALGRIND_FLAGS $PRE/unbound -vvvv -d -c ub.conf >unbound.log 2>&1 & +else +$PRE/unbound -vvvv -d -c ub.conf >unbound.log 2>&1 & +fi +UNBOUND_PID=$! +echo "UNBOUND_PID=$UNBOUND_PID" >> .tpkg.var.test + +cat .tpkg.var.test +wait_ldns_testns_up fwd.log +wait_unbound_up unbound.log + diff --git a/testdata/doq_downstream.tdir/doq_downstream.test b/testdata/doq_downstream.tdir/doq_downstream.test new file mode 100644 index 000000000..a302e8da6 --- /dev/null +++ b/testdata/doq_downstream.tdir/doq_downstream.test @@ -0,0 +1,109 @@ +# #-- doq_downstream.test --# +# source the master var file when it's there +[ -f ../.tpkg.var.master ] && source ../.tpkg.var.master +# use .tpkg.var.test for in test variable passing +[ -f .tpkg.var.test ] && source .tpkg.var.test + +PRE="../.." +. ../common.sh +get_make +(cd $PRE; $MAKE doqclient) + +# test query from local-data, immediate like from cache +echo "> query www.example.net." +$PRE/doqclient -s 127.0.0.1 -p $UNBOUND_PORT www.example.net. A IN >outfile 2>&1 +cat outfile +if test "$?" -ne 0; then + echo "exit status not OK" + echo "> cat logfiles" + cat outfile + cat fwd.log + cat unbound.log + echo "Not OK" + exit 1 +fi +if grep "www.example.net" outfile | grep "1.2.3.4"; then + echo "content OK" +else + echo "result contents not OK" + echo "> cat logfiles" + cat outfile + cat fwd.log + cat unbound.log + echo "result contents not OK" + exit 1 +fi +echo "OK" + +# test query that is resolved +echo "> query www.example.com." +$PRE/doqclient -s 127.0.0.1 -p $UNBOUND_PORT www.example.com. A IN >outfile 2>&1 +cat outfile +if test "$?" -ne 0; then + echo "exit status not OK" + echo "> cat logfiles" + cat outfile + cat fwd.log + cat unbound.log + echo "Not OK" + exit 1 +fi +if grep "www.example.com" outfile | grep "10.20.30.40"; then + echo "content OK" +else + echo "result contents not OK" + echo "> cat logfiles" + cat outfile + cat fwd.log + cat unbound.log + echo "result contents not OK" + exit 1 +fi +echo "OK" + +# Perform the lock verify tests, stop the server first. +kill_pid $UNBOUND_PID +cat unbound.log +# Remove pidfile so that the post script does not try to stop the server, +# it is already stopped. +rm -f unbound.pid +if test -f ublocktrace-doqclient.0; then + if $PRE/lock-verify ublocktrace-doqclient.* 2>&1; then + echo "lock-verify test ublocktrace-doqclient worked." + else + echo "lock-verify test ublocktrace-doqclient failed." + exit 1 + fi +fi +if test -f ublocktrace.0; then + if $PRE/lock-verify ublocktrace.* 2>&1; then + echo "lock-verify test ublocktrace worked." + else + echo "lock-verify test ublocktrace failed." + exit 1 + fi + if grep "lock error" unbound.log >/dev/null; then + echo "lock error" + exit 1 + fi +fi +# check valgrind output +if test -f $PRE/unbound_do_valgrind_in_test; then + if grep "All heap blocks were freed -- no leaks are possible" unbound.log; then + : # clean + else + grep "^==" unbound.log + echo "Memory leaked" + grep "in use at exit" unbound.log + exit 1 + fi + if grep "ERROR SUMMARY: 0 errors from 0 contexts" unbound.log; then + : # clean + else + grep "^==" unbound.log + echo "Errors" + grep "ERROR SUMMARY" unbound.log + exit 1 + fi +fi +exit 0 diff --git a/testdata/doq_downstream.tdir/doq_downstream.testns b/testdata/doq_downstream.tdir/doq_downstream.testns new file mode 100644 index 000000000..2d0ea45a4 --- /dev/null +++ b/testdata/doq_downstream.tdir/doq_downstream.testns @@ -0,0 +1,13 @@ +; nameserver test file +$ORIGIN example.com. +$TTL 3600 + +ENTRY_BEGIN +MATCH opcode qtype qname +REPLY QR AA NOERROR +ADJUST copy_id +SECTION QUESTION +www IN A +SECTION ANSWER +www IN A 10.20.30.40 +ENTRY_END diff --git a/testdata/doq_downstream.tdir/unbound_server.key b/testdata/doq_downstream.tdir/unbound_server.key new file mode 100644 index 000000000..4256c421d --- /dev/null +++ b/testdata/doq_downstream.tdir/unbound_server.key @@ -0,0 +1,15 @@ +-----BEGIN RSA PRIVATE KEY----- +MIICWwIBAAKBgQC3F7Jsv2u01pLL9rFnjsMU/IaCFUIz/624DcaE84Z4gjMl5kWA +3axQcqul1wlwSrbKwrony+d9hH/+MX0tZwvl8w3OmhmOAiaQ+SHCsIuOjVwQjX0s +RLB61Pz5+PAiVvnPa9JIYB5QrK6DVEsxIHj8MOc5JKORrnESsFDh6yeMeQIDAQAB +AoGAAuWoGBprTOA8UGfl5LqYkaNxSWumsYXxLMFjC8WCsjN1NbtQDDr1uAwodSZS +6ujzvX+ZTHnofs7y64XC8k34HTOCD2zlW7kijWbT8YjRYFU6o9F5zUGD9RCan0ds +sVscT2psLSzfdsmFAcbmnGdxYkXk2PC1FHtaqExxehralGUCQQDcqrg9uQKXlhQi +XAaPr8SiWvtRm2a9IMMZkRfUWZclPHq6fCWNuUaCD+cTat4wAuqeknAz33VEosw3 +fXGsok//AkEA1GjIHXrOcSlpfVJb6NeOBugjRtZ7ZDT5gbtnMS9ob0qntKV6saaL +CNmJwuD9Q3XkU5j1+uHvYGP2NzcJd2CjhwJACV0hNlVMe9w9fHvFN4Gw6WbM9ViP +0oS6YrJafYNTu5vGZXVxLoNnL4u3NYa6aPUmuZXjNwBLfJ8f5VboZPf6RwJAINd2 +oYA8bSi/A755MX4qmozH74r4Fx1Nuq5UHTm8RwDe/0Javx8F/j9MWpJY9lZDEF3l +In5OebPa/NyInSmW/wJAZuP9aRn0nDBkHYri++1A7NykMiJ/nH0mDECbnk+wxx0S +LwqIetBhxb8eQwMg45+iAH7CHAMQ8BQuF/nFE6eotg== +-----END RSA PRIVATE KEY----- diff --git a/testdata/doq_downstream.tdir/unbound_server.pem b/testdata/doq_downstream.tdir/unbound_server.pem new file mode 100644 index 000000000..aeda3ff11 --- /dev/null +++ b/testdata/doq_downstream.tdir/unbound_server.pem @@ -0,0 +1,11 @@ +-----BEGIN CERTIFICATE----- +MIIBmzCCAQQCCQDsNJ1UmphEFzANBgkqhkiG9w0BAQUFADASMRAwDgYDVQQDEwd1 +bmJvdW5kMB4XDTA4MDkxMTA5MDk0MFoXDTI4MDUyOTA5MDk0MFowEjEQMA4GA1UE +AxMHdW5ib3VuZDCBnzANBgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEAtxeybL9rtNaS +y/axZ47DFPyGghVCM/+tuA3GhPOGeIIzJeZFgN2sUHKrpdcJcEq2ysK6J8vnfYR/ +/jF9LWcL5fMNzpoZjgImkPkhwrCLjo1cEI19LESwetT8+fjwIlb5z2vSSGAeUKyu +g1RLMSB4/DDnOSSjka5xErBQ4esnjHkCAwEAATANBgkqhkiG9w0BAQUFAAOBgQAZ +9N0lnLENs4JMvPS+mn8C5m9bkkFITd32IiLjf0zgYpIUbFXH6XaEr9GNZBUG8feG +l/6WRXnbnVSblI5odQ4XxGZ9inYY6qtW30uv76HvoKp+QZ1c3460ddR8NauhcCHH +Z7S+QbLXi+r2JAhpPozZCjBHlRD0ixzA1mKQTJhJZg== +-----END CERTIFICATE----- diff --git a/util/config_file.c b/util/config_file.c index 2eb81fcee..879764bd9 100644 --- a/util/config_file.c +++ b/util/config_file.c @@ -135,6 +135,8 @@ config_create(void) cfg->http_query_buffer_size = 4*1024*1024; cfg->http_response_buffer_size = 4*1024*1024; cfg->http_nodelay = 1; + cfg->quic_port = UNBOUND_DNS_OVER_QUIC_PORT; + cfg->quic_size = 8*1024*1024; cfg->use_syslog = 1; cfg->log_identity = NULL; /* changed later with argv[0] */ cfg->log_time_ascii = 0; @@ -604,6 +606,8 @@ int config_set_option(struct config_file* cfg, const char* opt, else S_MEMSIZE("http-response-buffer-size:", http_response_buffer_size) else S_YNO("http-nodelay:", http_nodelay) else S_YNO("http-notls-downstream:", http_notls_downstream) + else S_NUMBER_NONZERO("quic-port:", quic_port) + else S_MEMSIZE("quic-size:", quic_size) else S_YNO("interface-automatic:", if_automatic) else S_STR("interface-automatic-ports:", if_automatic_ports) else S_YNO("use-systemd:", use_systemd) @@ -1154,6 +1158,8 @@ config_get_option(struct config_file* cfg, const char* opt, else O_MEM(opt, "http-response-buffer-size", http_response_buffer_size) else O_YNO(opt, "http-nodelay", http_nodelay) else O_YNO(opt, "http-notls-downstream", http_notls_downstream) + else O_DEC(opt, "quic-port", quic_port) + else O_MEM(opt, "quic-size", quic_size) else O_YNO(opt, "use-systemd", use_systemd) else O_YNO(opt, "do-daemonize", do_daemonize) else O_STR(opt, "chroot", chrootdir) @@ -2821,3 +2827,15 @@ if_is_dnscrypt(const char* ifname, const char* port, int dnscrypt_port) return 0; #endif } + +/** see if interface is quic, its port number == the quic port number */ +int +if_is_quic(const char* ifname, const char* port, int quic_port) +{ + char* p = strchr(ifname, '@'); + if(!p && atoi(port) == quic_port) + return 1; + if(p && atoi(p+1) == quic_port) + return 1; + return 0; +} diff --git a/util/config_file.h b/util/config_file.h index fbb09aa62..2969f8433 100644 --- a/util/config_file.h +++ b/util/config_file.h @@ -161,6 +161,11 @@ struct config_file { /** Disable TLS for http sockets downstream */ int http_notls_downstream; + /** port on which to provide DNS over QUIC service */ + int quic_port; + /** size of the quic data, max bytes */ + size_t quic_size; + /** outgoing port range number of ports (per thread) */ int outgoing_num_ports; /** number of outgoing tcp buffers per (per thread) */ @@ -1406,6 +1411,10 @@ int if_is_pp2(const char* ifname, const char* port, /** see if interface is DNSCRYPT, its port number == the dnscrypt port number */ int if_is_dnscrypt(const char* ifname, const char* port, int dnscrypt_port); + +/** see if interface is quic, its port number == the quic port number */ +int if_is_quic(const char* ifname, const char* port, int quic_port); + #ifdef USE_LINUX_IP_LOCAL_PORT_RANGE #define LINUX_IP_LOCAL_PORT_RANGE_PATH "/proc/sys/net/ipv4/ip_local_port_range" #endif diff --git a/util/configlexer.lex b/util/configlexer.lex index dbf2910de..4c0416f73 100644 --- a/util/configlexer.lex +++ b/util/configlexer.lex @@ -269,6 +269,8 @@ http-query-buffer-size{COLON} { YDVAR(1, VAR_HTTP_QUERY_BUFFER_SIZE) } http-response-buffer-size{COLON} { YDVAR(1, VAR_HTTP_RESPONSE_BUFFER_SIZE) } http-nodelay{COLON} { YDVAR(1, VAR_HTTP_NODELAY) } http-notls-downstream{COLON} { YDVAR(1, VAR_HTTP_NOTLS_DOWNSTREAM) } +quic-port{COLON} { YDVAR(1, VAR_QUIC_PORT) } +quic-size{COLON} { YDVAR(1, VAR_QUIC_SIZE) } use-systemd{COLON} { YDVAR(1, VAR_USE_SYSTEMD) } do-daemonize{COLON} { YDVAR(1, VAR_DO_DAEMONIZE) } interface{COLON} { YDVAR(1, VAR_INTERFACE) } diff --git a/util/configparser.y b/util/configparser.y index 2ca16f81c..9978e12f9 100644 --- a/util/configparser.y +++ b/util/configparser.y @@ -203,6 +203,7 @@ extern struct config_parser_state* cfg_parser; %token VAR_RPZ_SIGNAL_NXDOMAIN_RA VAR_INTERFACE_AUTOMATIC_PORTS VAR_EDE %token VAR_INTERFACE_ACTION VAR_INTERFACE_VIEW VAR_INTERFACE_TAG %token VAR_INTERFACE_TAG_ACTION VAR_INTERFACE_TAG_DATA +%token VAR_QUIC_PORT VAR_QUIC_SIZE %token VAR_PROXY_PROTOCOL_PORT VAR_STATISTICS_INHIBIT_ZERO %token VAR_HARDEN_UNKNOWN_ADDITIONAL VAR_DISABLE_EDNS_DO VAR_CACHEDB_NO_STORE %token VAR_LOG_DESTADDR VAR_CACHEDB_CHECK_WHEN_SERVE_EXPIRED @@ -342,6 +343,7 @@ content_server: server_num_threads | server_verbosity | server_port | server_edns_client_string_opcode | server_nsid | server_zonemd_permissive_mode | server_max_reuse_tcp_queries | server_tcp_reuse_timeout | server_tcp_auth_query_timeout | + server_quic_port | server_quic_size | server_interface_automatic_ports | server_ede | server_proxy_protocol_port | server_statistics_inhibit_zero | server_harden_unknown_additional | server_disable_edns_do | @@ -1209,6 +1211,21 @@ server_http_notls_downstream: VAR_HTTP_NOTLS_DOWNSTREAM STRING_ARG else cfg_parser->cfg->http_notls_downstream = (strcmp($2, "yes")==0); free($2); }; +server_quic_port: VAR_QUIC_PORT STRING_ARG + { + OUTYY(("P(server_quic_port:%s)\n", $2)); + if(atoi($2) == 0) + yyerror("port number expected"); + else cfg_parser->cfg->quic_port = atoi($2); + free($2); + }; +server_quic_size: VAR_QUIC_SIZE STRING_ARG + { + OUTYY(("P(server_quic_size:%s)\n", $2)); + if(!cfg_parse_memsize($2, &cfg_parser->cfg->quic_size)) + yyerror("memory size expected"); + free($2); + }; server_use_systemd: VAR_USE_SYSTEMD STRING_ARG { OUTYY(("P(server_use_systemd:%s)\n", $2)); diff --git a/util/fptr_wlist.c b/util/fptr_wlist.c index 705dc1bbe..e94ec5bbc 100644 --- a/util/fptr_wlist.c +++ b/util/fptr_wlist.c @@ -47,6 +47,7 @@ #include "util/fptr_wlist.h" #include "util/mini_event.h" #include "services/outside_network.h" +#include "services/listen_dnsport.h" #include "services/mesh.h" #include "services/localzone.h" #include "services/authzone.h" @@ -132,6 +133,9 @@ fptr_whitelist_comm_timer(void (*fptr)(void*)) else if(fptr == &worker_stat_timer_cb) return 1; else if(fptr == &worker_probe_timer_cb) return 1; else if(fptr == &validate_suspend_timer_cb) return 1; +#ifdef HAVE_NGTCP2 + else if(fptr == &doq_timer_cb) return 1; +#endif #ifdef UB_ON_WINDOWS else if(fptr == &wsvc_cron_cb) return 1; #endif @@ -181,6 +185,9 @@ fptr_whitelist_event(void (*fptr)(int, short, void *)) else if(fptr == &tube_handle_signal) return 1; else if(fptr == &comm_base_handle_slow_accept) return 1; else if(fptr == &comm_point_http_handle_callback) return 1; +#ifdef HAVE_NGTCP2 + else if(fptr == &comm_point_doq_callback) return 1; +#endif #ifdef USE_DNSTAP else if(fptr == &dtio_output_cb) return 1; else if(fptr == &dtio_cmd_cb) return 1; @@ -190,6 +197,10 @@ fptr_whitelist_event(void (*fptr)(int, short, void *)) else if(fptr == &dtio_tap_callback) return 1; else if(fptr == &dtio_mainfdcallback) return 1; #endif +#ifdef HAVE_NGTCP2 + else if(fptr == &doq_client_event_cb) return 1; + else if(fptr == &doq_client_timer_cb) return 1; +#endif #ifdef UB_ON_WINDOWS else if(fptr == &worker_win_stop_cb) return 1; #endif @@ -248,6 +259,12 @@ fptr_whitelist_rbtree_cmp(int (*fptr) (const void *, const void *)) else if(fptr == &auth_zone_cmp) return 1; else if(fptr == &auth_data_cmp) return 1; else if(fptr == &auth_xfer_cmp) return 1; +#ifdef HAVE_NGTCP2 + else if(fptr == &doq_conn_cmp) return 1; + else if(fptr == &doq_conid_cmp) return 1; + else if(fptr == &doq_timer_cmp) return 1; + else if(fptr == &doq_stream_cmp) return 1; +#endif return 0; } diff --git a/util/locks.h b/util/locks.h index d86ee4923..eb698cb75 100644 --- a/util/locks.h +++ b/util/locks.h @@ -88,6 +88,7 @@ #define lock_get_mem(lock) (0) /* nothing */ #define checklock_start() /* nop */ #define checklock_stop() /* nop */ +#define checklock_set_output_name(name) /* nop */ #ifdef HAVE_PTHREAD #include diff --git a/util/netevent.c b/util/netevent.c index 9d5131da9..55ea4e9aa 100644 --- a/util/netevent.c +++ b/util/netevent.c @@ -53,6 +53,7 @@ #include "dnstap/dnstap.h" #include "dnscrypt/dnscrypt.h" #include "services/listen_dnsport.h" +#include "util/random.h" #ifdef HAVE_SYS_TYPES_H #include #endif @@ -72,9 +73,16 @@ #ifdef HAVE_OPENSSL_ERR_H #include #endif + +#ifdef HAVE_NGTCP2 +#include +#include +#endif + #ifdef HAVE_LINUX_NET_TSTAMP_H #include #endif + /* -------- Start of local definitions -------- */ /** if CMSG_ALIGN is not defined on this platform, a workaround */ #ifndef CMSG_ALIGN @@ -1160,6 +1168,1774 @@ comm_point_udp_callback(int fd, short event, void* arg) } } +#ifdef HAVE_NGTCP2 +void +doq_pkt_addr_init(struct doq_pkt_addr* paddr) +{ + paddr->addrlen = (socklen_t)sizeof(paddr->addr); + paddr->localaddrlen = (socklen_t)sizeof(paddr->localaddr); + paddr->ifindex = 0; +} + +/** set the ecn on the transmission */ +static void +doq_set_ecn(int fd, int family, uint32_t ecn) +{ + unsigned int val = ecn; + if(family == AF_INET6) { + if(setsockopt(fd, IPPROTO_IPV6, IPV6_TCLASS, &val, + (socklen_t)sizeof(val)) == -1) { + log_err("setsockopt(.. IPV6_TCLASS ..): %s", + strerror(errno)); + } + return; + } + if(setsockopt(fd, IPPROTO_IP, IP_TOS, &val, + (socklen_t)sizeof(val)) == -1) { + log_err("setsockopt(.. IP_TOS ..): %s", + strerror(errno)); + } +} + +/** set the local address in the control ancillary data */ +static void +doq_set_localaddr_cmsg(struct msghdr* msg, size_t control_size, + struct doq_addr_storage* localaddr, socklen_t localaddrlen, + int ifindex) +{ +#ifndef S_SPLINT_S + struct cmsghdr* cmsg; +#endif /* S_SPLINT_S */ +#ifndef S_SPLINT_S + cmsg = CMSG_FIRSTHDR(msg); + if(localaddr->sockaddr.in.sin_family == AF_INET) { +#ifdef IP_PKTINFO + struct sockaddr_in* sa = (struct sockaddr_in*)localaddr; + struct in_pktinfo v4info; + log_assert(localaddrlen >= sizeof(struct sockaddr_in)); + msg->msg_controllen = CMSG_SPACE(sizeof(struct in_pktinfo)); + memset(msg->msg_control, 0, msg->msg_controllen); + log_assert(msg->msg_controllen <= control_size); + cmsg->cmsg_level = IPPROTO_IP; + cmsg->cmsg_type = IP_PKTINFO; + memset(&v4info, 0, sizeof(v4info)); +# ifdef HAVE_STRUCT_IN_PKTINFO_IPI_SPEC_DST + memmove(&v4info.ipi_spec_dst, &sa->sin_addr, + sizeof(struct in_addr)); +# else + memmove(&v4info.ipi_addr, &sa->sin_addr, + sizeof(struct in_addr)); +# endif + v4info.ipi_ifindex = ifindex; + memmove(CMSG_DATA(cmsg), &v4info, sizeof(struct in_pktinfo)); + cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo)); +#elif defined(IP_SENDSRCADDR) + struct sockaddr_in* sa= (struct sockaddr_in*)localaddr; + log_assert(localaddrlen >= sizeof(struct sockaddr_in)); + msg->msg_controllen = CMSG_SPACE(sizeof(struct in_addr)); + memset(msg->msg_control, 0, msg->msg_controllen); + log_assert(msg->msg_controllen <= control_size); + cmsg->cmsg_level = IPPROTO_IP; + cmsg->cmsg_type = IP_SENDSRCADDR; + memmove(CMSG_DATA(cmsg), &sa->sin_addr, + sizeof(struct in_addr)); + cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_addr)); +#endif + } else { + struct sockaddr_in6* sa6 = (struct sockaddr_in6*)localaddr; + struct in6_pktinfo v6info; + log_assert(localaddrlen >= sizeof(struct sockaddr_in6)); + msg->msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo)); + memset(msg->msg_control, 0, msg->msg_controllen); + log_assert(msg->msg_controllen <= control_size); + cmsg->cmsg_level = IPPROTO_IPV6; + cmsg->cmsg_type = IPV6_PKTINFO; + memset(&v6info, 0, sizeof(v6info)); + memmove(&v6info.ipi6_addr, &sa6->sin6_addr, + sizeof(struct in6_addr)); + v6info.ipi6_ifindex = ifindex; + memmove(CMSG_DATA(cmsg), &v6info, sizeof(struct in6_pktinfo)); + cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo)); + } +#endif /* S_SPLINT_S */ + /* Ignore unused variables, if no assertions are compiled. */ + (void)localaddrlen; + (void)control_size; +} + +/** write address and port into strings */ +static int +doq_print_addr_port(struct doq_addr_storage* addr, socklen_t addrlen, + char* host, size_t hostlen, char* port, size_t portlen) +{ + if(addr->sockaddr.in.sin_family == AF_INET) { + struct sockaddr_in* sa = (struct sockaddr_in*)addr; + log_assert(addrlen >= sizeof(*sa)); + if(inet_ntop(sa->sin_family, &sa->sin_addr, host, + (socklen_t)hostlen) == 0) { + log_hex("inet_ntop error: address", &sa->sin_addr, + sizeof(sa->sin_addr)); + return 0; + } + snprintf(port, portlen, "%u", (unsigned)ntohs(sa->sin_port)); + } else if(addr->sockaddr.in.sin_family == AF_INET6) { + struct sockaddr_in6* sa6 = (struct sockaddr_in6*)addr; + log_assert(addrlen >= sizeof(*sa6)); + if(inet_ntop(sa6->sin6_family, &sa6->sin6_addr, host, + (socklen_t)hostlen) == 0) { + log_hex("inet_ntop error: address", &sa6->sin6_addr, + sizeof(sa6->sin6_addr)); + return 0; + } + snprintf(port, portlen, "%u", (unsigned)ntohs(sa6->sin6_port)); + } + return 1; +} + +/** doq store the blocked packet when write has blocked */ +static void +doq_store_blocked_pkt(struct comm_point* c, struct doq_pkt_addr* paddr, + uint32_t ecn) +{ + if(c->doq_socket->have_blocked_pkt) + return; /* should not happen that we write when there is + already a blocked write, but if so, drop it. */ + if(sldns_buffer_limit(c->doq_socket->pkt_buf) > + sldns_buffer_capacity(c->doq_socket->blocked_pkt)) + return; /* impossibly large, drop packet. impossible because + pkt_buf and blocked_pkt are the same size. */ + c->doq_socket->have_blocked_pkt = 1; + c->doq_socket->blocked_pkt_pi.ecn = ecn; + memcpy(c->doq_socket->blocked_paddr, paddr, + sizeof(*c->doq_socket->blocked_paddr)); + sldns_buffer_clear(c->doq_socket->blocked_pkt); + sldns_buffer_write(c->doq_socket->blocked_pkt, + sldns_buffer_begin(c->doq_socket->pkt_buf), + sldns_buffer_limit(c->doq_socket->pkt_buf)); + sldns_buffer_flip(c->doq_socket->blocked_pkt); +} + +void +doq_send_pkt(struct comm_point* c, struct doq_pkt_addr* paddr, uint32_t ecn) +{ + struct msghdr msg; + struct iovec iov[1]; + union { + struct cmsghdr hdr; + char buf[256]; + } control; + ssize_t ret; + iov[0].iov_base = sldns_buffer_begin(c->doq_socket->pkt_buf); + iov[0].iov_len = sldns_buffer_limit(c->doq_socket->pkt_buf); + memset(&msg, 0, sizeof(msg)); + msg.msg_name = (void*)&paddr->addr; + msg.msg_namelen = paddr->addrlen; + msg.msg_iov = iov; + msg.msg_iovlen = 1; + msg.msg_control = control.buf; +#ifndef S_SPLINT_S + msg.msg_controllen = sizeof(control.buf); +#endif /* S_SPLINT_S */ + msg.msg_flags = 0; + + doq_set_localaddr_cmsg(&msg, sizeof(control.buf), &paddr->localaddr, + paddr->localaddrlen, paddr->ifindex); + doq_set_ecn(c->fd, paddr->addr.sockaddr.in.sin_family, ecn); + + for(;;) { + ret = sendmsg(c->fd, &msg, MSG_DONTWAIT); + if(ret == -1 && errno == EINTR) + continue; + break; + } + if(ret == -1) { +#ifndef USE_WINSOCK + if(errno == EAGAIN || +# ifdef EWOULDBLOCK + errno == EWOULDBLOCK || +# endif + errno == ENOBUFS) +#else + if(WSAGetLastError() == WSAEINPROGRESS || + WSAGetLastError() == WSAENOBUFS || + WSAGetLastError() == WSAEWOULDBLOCK) +#endif + { + /* udp send has blocked */ + doq_store_blocked_pkt(c, paddr, ecn); + return; + } + if(!udp_send_errno_needs_log((void*)&paddr->addr, + paddr->addrlen)) + return; + if(verbosity >= VERB_OPS) { + char host[256], port[32]; + if(doq_print_addr_port(&paddr->addr, paddr->addrlen, + host, sizeof(host), port, sizeof(port))) { + verbose(VERB_OPS, "doq sendmsg to %s %s " + "failed: %s", host, port, + strerror(errno)); + } else { + verbose(VERB_OPS, "doq sendmsg failed: %s", + strerror(errno)); + } + } + return; + } else if(ret != (ssize_t)sldns_buffer_limit(c->doq_socket->pkt_buf)) { + char host[256], port[32]; + if(doq_print_addr_port(&paddr->addr, paddr->addrlen, host, + sizeof(host), port, sizeof(port))) { + log_err("doq sendmsg to %s %s failed: " + "sent %d in place of %d bytes", + host, port, (int)ret, + (int)sldns_buffer_limit(c->doq_socket->pkt_buf)); + } else { + log_err("doq sendmsg failed: " + "sent %d in place of %d bytes", + (int)ret, (int)sldns_buffer_limit(c->doq_socket->pkt_buf)); + } + return; + } +} + +/** fetch port number */ +static int +doq_sockaddr_get_port(struct doq_addr_storage* addr) +{ + if(addr->sockaddr.in.sin_family == AF_INET) { + struct sockaddr_in* sa = (struct sockaddr_in*)addr; + return ntohs(sa->sin_port); + } else if(addr->sockaddr.in.sin_family == AF_INET6) { + struct sockaddr_in6* sa6 = (struct sockaddr_in6*)addr; + return ntohs(sa6->sin6_port); + } + return 0; +} + +/** get local address from ancillary data headers */ +static int +doq_get_localaddr_cmsg(struct comm_point* c, struct doq_pkt_addr* paddr, + int* pkt_continue, struct msghdr* msg) +{ +#ifndef S_SPLINT_S + struct cmsghdr* cmsg; +#endif /* S_SPLINT_S */ + + memset(&paddr->localaddr, 0, sizeof(paddr->localaddr)); +#ifndef S_SPLINT_S + for(cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL; + cmsg = CMSG_NXTHDR(msg, cmsg)) { + if( cmsg->cmsg_level == IPPROTO_IPV6 && + cmsg->cmsg_type == IPV6_PKTINFO) { + struct in6_pktinfo* v6info = + (struct in6_pktinfo*)CMSG_DATA(cmsg); + struct sockaddr_in6* sa= (struct sockaddr_in6*) + &paddr->localaddr; + struct sockaddr_in6* rema = (struct sockaddr_in6*) + &paddr->addr; + if(rema->sin6_family != AF_INET6) { + log_err("doq cmsg family mismatch cmsg is ip6"); + *pkt_continue = 1; + return 0; + } + sa->sin6_family = AF_INET6; + sa->sin6_port = htons(doq_sockaddr_get_port( + (void*)c->socket->addr)); + paddr->ifindex = v6info->ipi6_ifindex; + memmove(&sa->sin6_addr, &v6info->ipi6_addr, + sizeof(struct in6_addr)); + paddr->localaddrlen = sizeof(struct sockaddr_in6); + break; +#ifdef IP_PKTINFO + } else if( cmsg->cmsg_level == IPPROTO_IP && + cmsg->cmsg_type == IP_PKTINFO) { + struct in_pktinfo* v4info = + (struct in_pktinfo*)CMSG_DATA(cmsg); + struct sockaddr_in* sa= (struct sockaddr_in*) + &paddr->localaddr; + struct sockaddr_in* rema = (struct sockaddr_in*) + &paddr->addr; + if(rema->sin_family != AF_INET) { + log_err("doq cmsg family mismatch cmsg is ip4"); + *pkt_continue = 1; + return 0; + } + sa->sin_family = AF_INET; + sa->sin_port = htons(doq_sockaddr_get_port( + (void*)c->socket->addr)); + paddr->ifindex = v4info->ipi_ifindex; + memmove(&sa->sin_addr, &v4info->ipi_addr, + sizeof(struct in_addr)); + paddr->localaddrlen = sizeof(struct sockaddr_in); + break; +#elif defined(IP_RECVDSTADDR) + } else if( cmsg->cmsg_level == IPPROTO_IP && + cmsg->cmsg_type == IP_RECVDSTADDR) { + struct sockaddr_in* sa= (struct sockaddr_in*) + &paddr->localaddr; + struct sockaddr_in* rema = (struct sockaddr_in*) + &paddr->addr; + if(rema->sin_family != AF_INET) { + log_err("doq cmsg family mismatch cmsg is ip4"); + *pkt_continue = 1; + return 0; + } + sa->sin_family = AF_INET; + sa->sin_port = htons(doq_sockaddr_get_port( + (void*)c->socket->addr)); + paddr->ifindex = 0; + memmove(&sa.sin_addr, CMSG_DATA(cmsg), + sizeof(struct in_addr)); + paddr->localaddrlen = sizeof(struct sockaddr_in); + break; +#endif /* IP_PKTINFO or IP_RECVDSTADDR */ + } + } +#endif /* S_SPLINT_S */ + +return 1; +} + +/** get packet ecn information */ +static uint32_t +msghdr_get_ecn(struct msghdr* msg, int family) +{ +#ifndef S_SPLINT_S + struct cmsghdr* cmsg; + if(family == AF_INET6) { + for(cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL; + cmsg = CMSG_NXTHDR(msg, cmsg)) { + if(cmsg->cmsg_level == IPPROTO_IPV6 && + cmsg->cmsg_type == IPV6_TCLASS && + cmsg->cmsg_len != 0) { + uint8_t* ecn = (uint8_t*)CMSG_DATA(cmsg); + return *ecn; + } + } + return 0; + } + for(cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL; + cmsg = CMSG_NXTHDR(msg, cmsg)) { + if(cmsg->cmsg_level == IPPROTO_IP && + cmsg->cmsg_type == IP_TOS && + cmsg->cmsg_len != 0) { + uint8_t* ecn = (uint8_t*)CMSG_DATA(cmsg); + return *ecn; + } + } +#endif /* S_SPLINT_S */ + return 0; +} + +/** receive packet for DoQ on UDP. get ancillary data for addresses, + * return false if failed and the callback can stop receiving UDP packets + * if pkt_continue is false. */ +static int +doq_recv(struct comm_point* c, struct doq_pkt_addr* paddr, int* pkt_continue, + struct ngtcp2_pkt_info* pi) +{ + struct msghdr msg; + struct iovec iov[1]; + ssize_t rcv; + union { + struct cmsghdr hdr; + char buf[256]; + } ancil; + + msg.msg_name = &paddr->addr; + msg.msg_namelen = (socklen_t)sizeof(paddr->addr); + iov[0].iov_base = sldns_buffer_begin(c->doq_socket->pkt_buf); + iov[0].iov_len = sldns_buffer_remaining(c->doq_socket->pkt_buf); + msg.msg_iov = iov; + msg.msg_iovlen = 1; + msg.msg_control = ancil.buf; +#ifndef S_SPLINT_S + msg.msg_controllen = sizeof(ancil.buf); +#endif /* S_SPLINT_S */ + msg.msg_flags = 0; + + rcv = recvmsg(c->fd, &msg, MSG_DONTWAIT); + if(rcv == -1) { + if(errno != EAGAIN && errno != EINTR + && udp_recv_needs_log(errno)) { + log_err("recvmsg failed for doq: %s", strerror(errno)); + } + *pkt_continue = 0; + return 0; + } + + paddr->addrlen = msg.msg_namelen; + sldns_buffer_skip(c->doq_socket->pkt_buf, rcv); + sldns_buffer_flip(c->doq_socket->pkt_buf); + if(!doq_get_localaddr_cmsg(c, paddr, pkt_continue, &msg)) + return 0; + pi->ecn = msghdr_get_ecn(&msg, paddr->addr.sockaddr.in.sin_family); + return 1; +} + +/** send the version negotiation for doq. scid and dcid are flipped around + * to send back to the client. */ +static void +doq_send_version_negotiation(struct comm_point* c, struct doq_pkt_addr* paddr, + const uint8_t* dcid, size_t dcidlen, const uint8_t* scid, + size_t scidlen) +{ + uint32_t versions[2]; + size_t versions_len = 0; + ngtcp2_ssize ret; + uint8_t unused_random; + + /* fill the array with supported versions */ + versions[0] = NGTCP2_PROTO_VER_V1; + versions_len = 1; + unused_random = ub_random_max(c->doq_socket->rnd, 256); + sldns_buffer_clear(c->doq_socket->pkt_buf); + ret = ngtcp2_pkt_write_version_negotiation( + sldns_buffer_begin(c->doq_socket->pkt_buf), + sldns_buffer_capacity(c->doq_socket->pkt_buf), unused_random, + dcid, dcidlen, scid, scidlen, versions, versions_len); + if(ret < 0) { + log_err("ngtcp2_pkt_write_version_negotiation failed: %s", + ngtcp2_strerror(ret)); + return; + } + sldns_buffer_set_position(c->doq_socket->pkt_buf, ret); + sldns_buffer_flip(c->doq_socket->pkt_buf); + doq_send_pkt(c, paddr, 0); +} + +/** Find the doq_conn object by remote address and dcid */ +static struct doq_conn* +doq_conn_find(struct doq_table* table, struct doq_addr_storage* addr, + socklen_t addrlen, struct doq_addr_storage* localaddr, + socklen_t localaddrlen, int ifindex, const uint8_t* dcid, + size_t dcidlen) +{ + struct rbnode_type* node; + struct doq_conn key; + memset(&key.node, 0, sizeof(key.node)); + key.node.key = &key; + memmove(&key.key.paddr.addr, addr, addrlen); + key.key.paddr.addrlen = addrlen; + memmove(&key.key.paddr.localaddr, localaddr, localaddrlen); + key.key.paddr.localaddrlen = localaddrlen; + key.key.paddr.ifindex = ifindex; + key.key.dcid = (void*)dcid; + key.key.dcidlen = dcidlen; + node = rbtree_search(table->conn_tree, &key); + if(node) + return (struct doq_conn*)node->key; + return NULL; +} + +/** find the doq_con by the connection id */ +static struct doq_conn* +doq_conn_find_by_id(struct doq_table* table, const uint8_t* dcid, + size_t dcidlen) +{ + struct doq_conid* conid; + lock_rw_rdlock(&table->conid_lock); + conid = doq_conid_find(table, dcid, dcidlen); + if(conid) { + /* make a copy of the key */ + struct doq_conn* conn; + struct doq_conn_key key = conid->key; + uint8_t cid[NGTCP2_MAX_CIDLEN]; + log_assert(conid->key.dcidlen <= NGTCP2_MAX_CIDLEN); + memcpy(cid, conid->key.dcid, conid->key.dcidlen); + key.dcid = cid; + lock_rw_unlock(&table->conid_lock); + + /* now that the conid lock is released, look up the conn */ + lock_rw_rdlock(&table->lock); + conn = doq_conn_find(table, &key.paddr.addr, + key.paddr.addrlen, &key.paddr.localaddr, + key.paddr.localaddrlen, key.paddr.ifindex, key.dcid, + key.dcidlen); + if(!conn) { + /* The connection got deleted between the conid lookup + * and the connection lock grab, it no longer exists, + * so return null. */ + lock_rw_unlock(&table->lock); + return NULL; + } + lock_basic_lock(&conn->lock); + if(conn->is_deleted) { + lock_rw_unlock(&table->lock); + lock_basic_unlock(&conn->lock); + return NULL; + } + lock_rw_unlock(&table->lock); + return conn; + } + lock_rw_unlock(&table->conid_lock); + return NULL; +} + +/** Find the doq_conn, by addr or by connection id */ +static struct doq_conn* +doq_conn_find_by_addr_or_cid(struct doq_table* table, + struct doq_pkt_addr* paddr, const uint8_t* dcid, size_t dcidlen) +{ + struct doq_conn* conn; + lock_rw_rdlock(&table->lock); + conn = doq_conn_find(table, &paddr->addr, paddr->addrlen, + &paddr->localaddr, paddr->localaddrlen, paddr->ifindex, + dcid, dcidlen); + if(conn && conn->is_deleted) { + conn = NULL; + } + if(conn) { + lock_basic_lock(&conn->lock); + lock_rw_unlock(&table->lock); + verbose(VERB_ALGO, "doq: found connection by address, dcid"); + } else { + lock_rw_unlock(&table->lock); + conn = doq_conn_find_by_id(table, dcid, dcidlen); + if(conn) { + verbose(VERB_ALGO, "doq: found connection by dcid"); + } + } + return conn; +} + +/** decode doq packet header, false on handled or failure, true to continue + * to process the packet */ +static int +doq_decode_pkt_header_negotiate(struct comm_point* c, + struct doq_pkt_addr* paddr, struct doq_conn** conn) +{ +#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID + struct ngtcp2_version_cid vc; +#else + uint32_t version; + const uint8_t *dcid, *scid; + size_t dcidlen, scidlen; +#endif + int rv; + +#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID + rv = ngtcp2_pkt_decode_version_cid(&vc, + sldns_buffer_begin(c->doq_socket->pkt_buf), + sldns_buffer_limit(c->doq_socket->pkt_buf), + c->doq_socket->sv_scidlen); +#else + rv = ngtcp2_pkt_decode_version_cid(&version, &dcid, &dcidlen, + &scid, &scidlen, sldns_buffer_begin(c->doq_socket->pkt_buf), + sldns_buffer_limit(c->doq_socket->pkt_buf), c->doq_socket->sv_scidlen); +#endif + if(rv != 0) { + if(rv == NGTCP2_ERR_VERSION_NEGOTIATION) { + /* send the version negotiation */ + doq_send_version_negotiation(c, paddr, +#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID + vc.scid, vc.scidlen, vc.dcid, vc.dcidlen +#else + scid, scidlen, dcid, dcidlen +#endif + ); + return 0; + } + verbose(VERB_ALGO, "doq: could not decode version " + "and CID from QUIC packet header: %s", + ngtcp2_strerror(rv)); + return 0; + } + + if(verbosity >= VERB_ALGO) { + verbose(VERB_ALGO, "ngtcp2_pkt_decode_version_cid packet has " + "QUIC protocol version %u", (unsigned) +#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID + vc. +#endif + version + ); + log_hex("dcid", +#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID + (void*)vc.dcid, vc.dcidlen +#else + (void*)dcid, dcidlen +#endif + ); + log_hex("scid", +#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID + (void*)vc.scid, vc.scidlen +#else + (void*)scid, scidlen +#endif + ); + } + *conn = doq_conn_find_by_addr_or_cid(c->doq_socket->table, paddr, +#ifdef HAVE_STRUCT_NGTCP2_VERSION_CID + vc.dcid, vc.dcidlen +#else + dcid, dcidlen +#endif + ); + if(*conn) + (*conn)->doq_socket = c->doq_socket; + return 1; +} + +/** fill cid structure with random data */ +static void doq_cid_randfill(struct ngtcp2_cid* cid, size_t datalen, + struct ub_randstate* rnd) +{ + uint8_t buf[32]; + if(datalen > sizeof(buf)) + datalen = sizeof(buf); + doq_fill_rand(rnd, buf, datalen); + ngtcp2_cid_init(cid, buf, datalen); +} + +/** send retry packet for doq connection. */ +static void +doq_send_retry(struct comm_point* c, struct doq_pkt_addr* paddr, + struct ngtcp2_pkt_hd* hd) +{ + char host[256], port[32]; + struct ngtcp2_cid scid; + uint8_t token[NGTCP2_CRYPTO_MAX_RETRY_TOKENLEN]; + ngtcp2_tstamp ts; + ngtcp2_ssize tokenlen, ret; + + if(!doq_print_addr_port(&paddr->addr, paddr->addrlen, host, + sizeof(host), port, sizeof(port))) { + log_err("doq_send_retry failed"); + return; + } + verbose(VERB_ALGO, "doq: sending retry packet to %s %s", host, port); + + /* the server chosen source connection ID */ + scid.datalen = c->doq_socket->sv_scidlen; + doq_cid_randfill(&scid, scid.datalen, c->doq_socket->rnd); + + ts = doq_get_timestamp_nanosec(); + + tokenlen = ngtcp2_crypto_generate_retry_token(token, + c->doq_socket->static_secret, c->doq_socket->static_secret_len, + hd->version, (void*)&paddr->addr, paddr->addrlen, &scid, + &hd->dcid, ts); + if(tokenlen < 0) { + log_err("ngtcp2_crypto_generate_retry_token failed: %s", + ngtcp2_strerror(tokenlen)); + return; + } + + sldns_buffer_clear(c->doq_socket->pkt_buf); + ret = ngtcp2_crypto_write_retry(sldns_buffer_begin(c->doq_socket->pkt_buf), + sldns_buffer_capacity(c->doq_socket->pkt_buf), hd->version, + &hd->scid, &scid, &hd->dcid, token, tokenlen); + if(ret < 0) { + log_err("ngtcp2_crypto_write_retry failed: %s", + ngtcp2_strerror(ret)); + return; + } + sldns_buffer_set_position(c->doq_socket->pkt_buf, ret); + sldns_buffer_flip(c->doq_socket->pkt_buf); + doq_send_pkt(c, paddr, 0); +} + +/** doq send stateless connection close */ +static void +doq_send_stateless_connection_close(struct comm_point* c, + struct doq_pkt_addr* paddr, struct ngtcp2_pkt_hd* hd, + uint64_t error_code) +{ + ngtcp2_ssize ret; + sldns_buffer_clear(c->doq_socket->pkt_buf); + ret = ngtcp2_crypto_write_connection_close( + sldns_buffer_begin(c->doq_socket->pkt_buf), + sldns_buffer_capacity(c->doq_socket->pkt_buf), hd->version, &hd->scid, + &hd->dcid, error_code, NULL, 0); + if(ret < 0) { + log_err("ngtcp2_crypto_write_connection_close failed: %s", + ngtcp2_strerror(ret)); + return; + } + sldns_buffer_set_position(c->doq_socket->pkt_buf, ret); + sldns_buffer_flip(c->doq_socket->pkt_buf); + doq_send_pkt(c, paddr, 0); +} + +/** doq verify retry token, false on failure */ +static int +doq_verify_retry_token(struct comm_point* c, struct doq_pkt_addr* paddr, + struct ngtcp2_cid* ocid, struct ngtcp2_pkt_hd* hd) +{ + char host[256], port[32]; + ngtcp2_tstamp ts; + if(!doq_print_addr_port(&paddr->addr, paddr->addrlen, host, + sizeof(host), port, sizeof(port))) { + log_err("doq_verify_retry_token failed"); + return 0; + } + ts = doq_get_timestamp_nanosec(); + verbose(VERB_ALGO, "doq: verifying retry token from %s %s", host, + port); + if(ngtcp2_crypto_verify_retry_token(ocid, +#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN + hd->token, hd->tokenlen, +#else + hd->token.base, hd->token.len, +#endif + c->doq_socket->static_secret, + c->doq_socket->static_secret_len, hd->version, + (void*)&paddr->addr, paddr->addrlen, &hd->dcid, + 10*NGTCP2_SECONDS, ts) != 0) { + verbose(VERB_ALGO, "doq: could not verify retry token " + "from %s %s", host, port); + return 0; + } + verbose(VERB_ALGO, "doq: verified retry token from %s %s", host, port); + return 1; +} + +/** doq verify token, false on failure */ +static int +doq_verify_token(struct comm_point* c, struct doq_pkt_addr* paddr, + struct ngtcp2_pkt_hd* hd) +{ + char host[256], port[32]; + ngtcp2_tstamp ts; + if(!doq_print_addr_port(&paddr->addr, paddr->addrlen, host, + sizeof(host), port, sizeof(port))) { + log_err("doq_verify_token failed"); + return 0; + } + ts = doq_get_timestamp_nanosec(); + verbose(VERB_ALGO, "doq: verifying token from %s %s", host, port); + if(ngtcp2_crypto_verify_regular_token( +#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN + hd->token, hd->tokenlen, +#else + hd->token.base, hd->token.len, +#endif + c->doq_socket->static_secret, c->doq_socket->static_secret_len, + (void*)&paddr->addr, paddr->addrlen, 3600*NGTCP2_SECONDS, + ts) != 0) { + verbose(VERB_ALGO, "doq: could not verify token from %s %s", + host, port); + return 0; + } + verbose(VERB_ALGO, "doq: verified token from %s %s", host, port); + return 1; +} + +/** delete and remove from the lookup tree the doq_conn connection */ +static void +doq_delete_connection(struct comm_point* c, struct doq_conn* conn) +{ + struct doq_conn copy; + uint8_t cid[NGTCP2_MAX_CIDLEN]; + rbnode_type* node; + if(!conn) + return; + /* Copy the key and set it deleted. */ + conn->is_deleted = 1; + doq_conn_write_disable(conn); + copy.key = conn->key; + log_assert(conn->key.dcidlen <= NGTCP2_MAX_CIDLEN); + memcpy(cid, conn->key.dcid, conn->key.dcidlen); + copy.key.dcid = cid; + copy.node.key = © + lock_basic_unlock(&conn->lock); + + /* Now get the table lock to delete it from the tree */ + lock_rw_wrlock(&c->doq_socket->table->lock); + node = rbtree_delete(c->doq_socket->table->conn_tree, copy.node.key); + if(node) { + conn = (struct doq_conn*)node->key; + lock_basic_lock(&conn->lock); + doq_conn_write_list_remove(c->doq_socket->table, conn); + if(conn->timer.timer_in_list) { + /* Remove timer from list first, because finding the + * rbnode element of the setlist of same timeouts + * needs tree lookup. Edit the tree structure after + * that lookup. */ + doq_timer_list_remove(c->doq_socket->table, + &conn->timer); + } + if(conn->timer.timer_in_tree) + doq_timer_tree_remove(c->doq_socket->table, + &conn->timer); + } + lock_rw_unlock(&c->doq_socket->table->lock); + if(node) { + lock_basic_unlock(&conn->lock); + doq_table_quic_size_subtract(c->doq_socket->table, + sizeof(*conn)+conn->key.dcidlen); + doq_conn_delete(conn, c->doq_socket->table); + } +} + +/** create and setup a new doq connection, to a new destination, or with + * a new dcid. It has a new set of streams. It is inserted in the lookup tree. + * Returns NULL on failure. */ +static struct doq_conn* +doq_setup_new_conn(struct comm_point* c, struct doq_pkt_addr* paddr, + struct ngtcp2_pkt_hd* hd, struct ngtcp2_cid* ocid) +{ + struct doq_conn* conn; + if(!doq_table_quic_size_available(c->doq_socket->table, + c->doq_socket->cfg, sizeof(*conn)+hd->dcid.datalen + + sizeof(struct doq_stream) + + 100 /* estimated input query */ + + 1200 /* estimated output query */)) { + verbose(VERB_ALGO, "doq: no mem available for new connection"); + doq_send_stateless_connection_close(c, paddr, hd, + NGTCP2_CONNECTION_REFUSED); + return NULL; + } + conn = doq_conn_create(c, paddr, hd->dcid.data, hd->dcid.datalen, + hd->version); + if(!conn) { + log_err("doq: could not allocate doq_conn"); + return NULL; + } + lock_rw_wrlock(&c->doq_socket->table->lock); + lock_basic_lock(&conn->lock); + if(!rbtree_insert(c->doq_socket->table->conn_tree, &conn->node)) { + lock_rw_unlock(&c->doq_socket->table->lock); + log_err("doq: duplicate connection"); + /* conn has no entry in writelist, and no timer yet. */ + lock_basic_unlock(&conn->lock); + doq_conn_delete(conn, c->doq_socket->table); + return NULL; + } + lock_rw_unlock(&c->doq_socket->table->lock); + doq_table_quic_size_add(c->doq_socket->table, + sizeof(*conn)+conn->key.dcidlen); + verbose(VERB_ALGO, "doq: created new connection"); + + /* the scid and dcid switch meaning from the accepted client + * connection to the server connection. The 'source' and 'destination' + * meaning is reversed. */ + if(!doq_conn_setup(conn, hd->scid.data, hd->scid.datalen, + (ocid?ocid->data:NULL), (ocid?ocid->datalen:0), +#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN + hd->token, hd->tokenlen +#else + hd->token.base, hd->token.len +#endif + )) { + log_err("doq: could not set up connection"); + doq_delete_connection(c, conn); + return NULL; + } + return conn; +} + +/** perform doq address validation */ +static int +doq_address_validation(struct comm_point* c, struct doq_pkt_addr* paddr, + struct ngtcp2_pkt_hd* hd, struct ngtcp2_cid* ocid, + struct ngtcp2_cid** pocid) +{ +#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN + const uint8_t* token = hd->token; + size_t tokenlen = hd->tokenlen; +#else + const uint8_t* token = hd->token.base; + size_t tokenlen = hd->token.len; +#endif + verbose(VERB_ALGO, "doq stateless address validation"); + + if(tokenlen == 0 || token == NULL) { + doq_send_retry(c, paddr, hd); + return 0; + } + if(token[0] != NGTCP2_CRYPTO_TOKEN_MAGIC_RETRY && + hd->dcid.datalen < NGTCP2_MIN_INITIAL_DCIDLEN) { + doq_send_stateless_connection_close(c, paddr, hd, + NGTCP2_INVALID_TOKEN); + return 0; + } + if(token[0] == NGTCP2_CRYPTO_TOKEN_MAGIC_RETRY) { + if(!doq_verify_retry_token(c, paddr, ocid, hd)) { + doq_send_stateless_connection_close(c, paddr, hd, + NGTCP2_INVALID_TOKEN); + return 0; + } + *pocid = ocid; + } else if(token[0] == NGTCP2_CRYPTO_TOKEN_MAGIC_REGULAR) { + if(!doq_verify_token(c, paddr, hd)) { + doq_send_retry(c, paddr, hd); + return 0; + } +#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN + hd->token = NULL; + hd->tokenlen = 0; +#else + hd->token.base = NULL; + hd->token.len = 0; +#endif + } else { + verbose(VERB_ALGO, "doq address validation: unrecognised " + "token in hd.token.base with magic byte 0x%2.2x", + (int)token[0]); + if(c->doq_socket->validate_addr) { + doq_send_retry(c, paddr, hd); + return 0; + } +#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN + hd->token = NULL; + hd->tokenlen = 0; +#else + hd->token.base = NULL; + hd->token.len = 0; +#endif + } + return 1; +} + +/** the doq accept, returns false if no further processing of content */ +static int +doq_accept(struct comm_point* c, struct doq_pkt_addr* paddr, + struct doq_conn** conn, struct ngtcp2_pkt_info* pi) +{ + int rv; + struct ngtcp2_pkt_hd hd; + struct ngtcp2_cid ocid, *pocid=NULL; + int err_retry; + memset(&hd, 0, sizeof(hd)); + rv = ngtcp2_accept(&hd, sldns_buffer_begin(c->doq_socket->pkt_buf), + sldns_buffer_limit(c->doq_socket->pkt_buf)); + if(rv != 0) { + if(rv == NGTCP2_ERR_RETRY) { + doq_send_retry(c, paddr, &hd); + return 0; + } + log_err("doq: initial packet failed, ngtcp2_accept failed: %s", + ngtcp2_strerror(rv)); + return 0; + } + if(c->doq_socket->validate_addr || +#ifdef HAVE_STRUCT_NGTCP2_PKT_HD_TOKENLEN + hd.tokenlen +#else + hd.token.len +#endif + ) { + if(!doq_address_validation(c, paddr, &hd, &ocid, &pocid)) + return 0; + } + *conn = doq_setup_new_conn(c, paddr, &hd, pocid); + if(!*conn) + return 0; + (*conn)->doq_socket = c->doq_socket; + if(!doq_conn_recv(c, paddr, *conn, pi, &err_retry, NULL)) { + if(err_retry) + doq_send_retry(c, paddr, &hd); + doq_delete_connection(c, *conn); + *conn = NULL; + return 0; + } + return 1; +} + +/** doq pickup a timer to wait for for the worker. If any timer exists. */ +static void +doq_pickup_timer(struct comm_point* c) +{ + struct doq_timer* t; + struct timeval tv; + int have_time = 0; + memset(&tv, 0, sizeof(tv)); + + lock_rw_wrlock(&c->doq_socket->table->lock); + RBTREE_FOR(t, struct doq_timer*, c->doq_socket->table->timer_tree) { + if(t->worker_doq_socket == NULL || + t->worker_doq_socket == c->doq_socket) { + /* pick up this element */ + t->worker_doq_socket = c->doq_socket; + have_time = 1; + memcpy(&tv, &t->time, sizeof(tv)); + break; + } + } + lock_rw_unlock(&c->doq_socket->table->lock); + + if(have_time) { + struct timeval rel; + timeval_subtract(&rel, &tv, c->doq_socket->now_tv); + comm_timer_set(c->doq_socket->timer, &rel); + memcpy(&c->doq_socket->marked_time, &tv, + sizeof(c->doq_socket->marked_time)); + verbose(VERB_ALGO, "doq pickup timer at %d.%6.6d in %d.%6.6d", + (int)tv.tv_sec, (int)tv.tv_usec, (int)rel.tv_sec, + (int)rel.tv_usec); + } else { + if(comm_timer_is_set(c->doq_socket->timer)) + comm_timer_disable(c->doq_socket->timer); + memset(&c->doq_socket->marked_time, 0, + sizeof(c->doq_socket->marked_time)); + verbose(VERB_ALGO, "doq timer disabled"); + } +} + +/** doq done with connection, release locks and setup timer and write */ +static void +doq_done_setup_timer_and_write(struct comm_point* c, struct doq_conn* conn) +{ + struct doq_conn copy; + uint8_t cid[NGTCP2_MAX_CIDLEN]; + rbnode_type* node; + struct timeval new_tv; + int write_change = 0, timer_change = 0; + + /* No longer in callbacks, so the pointer to doq_socket is back + * to NULL. */ + conn->doq_socket = NULL; + + if(doq_conn_check_timer(conn, &new_tv)) + timer_change = 1; + if( (conn->write_interest && !conn->on_write_list) || + (!conn->write_interest && conn->on_write_list)) + write_change = 1; + + if(!timer_change && !write_change) { + /* Nothing to do. */ + lock_basic_unlock(&conn->lock); + return; + } + + /* The table lock is needed to change the write list and timer tree. + * So the connection lock is release and then the connection is + * looked up again. */ + copy.key = conn->key; + log_assert(conn->key.dcidlen <= NGTCP2_MAX_CIDLEN); + memcpy(cid, conn->key.dcid, conn->key.dcidlen); + copy.key.dcid = cid; + copy.node.key = © + lock_basic_unlock(&conn->lock); + + lock_rw_wrlock(&c->doq_socket->table->lock); + node = rbtree_search(c->doq_socket->table->conn_tree, copy.node.key); + if(!node) { + lock_rw_unlock(&c->doq_socket->table->lock); + /* Must have been deleted in the mean time. */ + return; + } + conn = (struct doq_conn*)node->key; + lock_basic_lock(&conn->lock); + if(conn->is_deleted) { + /* It is deleted now. */ + lock_rw_unlock(&c->doq_socket->table->lock); + lock_basic_unlock(&conn->lock); + return; + } + + if(write_change) { + /* Edit the write lists, we are holding the table.lock and can + * edit the list first,last and also prev,next and on_list + * elements in the doq_conn structures. */ + doq_conn_set_write_list(c->doq_socket->table, conn); + } + if(timer_change) { + doq_timer_set(c->doq_socket->table, &conn->timer, + c->doq_socket, &new_tv); + } + lock_rw_unlock(&c->doq_socket->table->lock); + lock_basic_unlock(&conn->lock); +} + +/** doq done with connection callbacks, release locks and setup write */ +static void +doq_done_with_conn_cb(struct comm_point* c, struct doq_conn* conn) +{ + struct doq_conn copy; + uint8_t cid[NGTCP2_MAX_CIDLEN]; + rbnode_type* node; + + /* no longer in callbacks, so the pointer to doq_socket is back + * to NULL. */ + conn->doq_socket = NULL; + + if( (conn->write_interest && conn->on_write_list) || + (!conn->write_interest && !conn->on_write_list)) { + /* The connection already has the required write list + * status. */ + lock_basic_unlock(&conn->lock); + return; + } + + /* To edit the write list of connections we have to hold the table + * lock, so we release the connection and then look it up again. */ + copy.key = conn->key; + log_assert(conn->key.dcidlen <= NGTCP2_MAX_CIDLEN); + memcpy(cid, conn->key.dcid, conn->key.dcidlen); + copy.key.dcid = cid; + copy.node.key = © + lock_basic_unlock(&conn->lock); + + lock_rw_wrlock(&c->doq_socket->table->lock); + node = rbtree_search(c->doq_socket->table->conn_tree, copy.node.key); + if(!node) { + lock_rw_unlock(&c->doq_socket->table->lock); + /* must have been deleted in the mean time */ + return; + } + conn = (struct doq_conn*)node->key; + lock_basic_lock(&conn->lock); + if(conn->is_deleted) { + /* it is deleted now. */ + lock_rw_unlock(&c->doq_socket->table->lock); + lock_basic_unlock(&conn->lock); + return; + } + + /* edit the write lists, we are holding the table.lock and can + * edit the list first,last and also prev,next and on_list elements + * in the doq_conn structures. */ + doq_conn_set_write_list(c->doq_socket->table, conn); + lock_rw_unlock(&c->doq_socket->table->lock); + lock_basic_unlock(&conn->lock); +} + +/** doq count the length of the write list */ +static size_t +doq_write_list_length(struct comm_point* c) +{ + size_t count = 0; + struct doq_conn* conn; + lock_rw_rdlock(&c->doq_socket->table->lock); + conn = c->doq_socket->table->write_list_first; + while(conn) { + count++; + conn = conn->write_next; + } + lock_rw_unlock(&c->doq_socket->table->lock); + return count; +} + +/** doq pop the first element from the write list to have write events */ +static struct doq_conn* +doq_pop_write_conn(struct comm_point* c) +{ + struct doq_conn* conn; + lock_rw_wrlock(&c->doq_socket->table->lock); + conn = doq_table_pop_first(c->doq_socket->table); + while(conn && conn->is_deleted) { + lock_basic_unlock(&conn->lock); + conn = doq_table_pop_first(c->doq_socket->table); + } + lock_rw_unlock(&c->doq_socket->table->lock); + if(conn) + conn->doq_socket = c->doq_socket; + return conn; +} + +/** doq the connection is done with write callbacks, release it. */ +static void +doq_done_with_write_cb(struct comm_point* c, struct doq_conn* conn, + int delete_it) +{ + if(delete_it) { + doq_delete_connection(c, conn); + return; + } + doq_done_setup_timer_and_write(c, conn); +} + +/** see if the doq socket wants to write packets */ +static int +doq_socket_want_write(struct comm_point* c) +{ + int want_write = 0; + if(c->doq_socket->have_blocked_pkt) + return 1; + lock_rw_rdlock(&c->doq_socket->table->lock); + if(c->doq_socket->table->write_list_first) + want_write = 1; + lock_rw_unlock(&c->doq_socket->table->lock); + return want_write; +} + +/** enable write event for the doq server socket fd */ +static void +doq_socket_write_enable(struct comm_point* c) +{ + verbose(VERB_ALGO, "doq socket want write"); + if(c->doq_socket->event_has_write) + return; + comm_point_listen_for_rw(c, 1, 1); + c->doq_socket->event_has_write = 1; +} + +/** disable write event for the doq server socket fd */ +static void +doq_socket_write_disable(struct comm_point* c) +{ + verbose(VERB_ALGO, "doq socket want no write"); + if(!c->doq_socket->event_has_write) + return; + comm_point_listen_for_rw(c, 1, 0); + c->doq_socket->event_has_write = 0; +} + +/** write blocked packet, if possible. returns false if failed, again. */ +static int +doq_write_blocked_pkt(struct comm_point* c) +{ + struct doq_pkt_addr paddr; + if(!c->doq_socket->have_blocked_pkt) + return 1; + c->doq_socket->have_blocked_pkt = 0; + if(sldns_buffer_limit(c->doq_socket->blocked_pkt) > + sldns_buffer_remaining(c->doq_socket->pkt_buf)) + return 1; /* impossibly large, drop it. + impossible since pkt_buf is same size as blocked_pkt buf. */ + sldns_buffer_clear(c->doq_socket->pkt_buf); + sldns_buffer_write(c->doq_socket->pkt_buf, + sldns_buffer_begin(c->doq_socket->blocked_pkt), + sldns_buffer_limit(c->doq_socket->blocked_pkt)); + sldns_buffer_flip(c->doq_socket->pkt_buf); + memcpy(&paddr, c->doq_socket->blocked_paddr, sizeof(paddr)); + doq_send_pkt(c, &paddr, c->doq_socket->blocked_pkt_pi.ecn); + if(c->doq_socket->have_blocked_pkt) + return 0; + return 1; +} + +/** doq find a timer that timeouted and return the conn, locked. */ +static struct doq_conn* +doq_timer_timeout_conn(struct doq_server_socket* doq_socket) +{ + struct doq_conn* conn = NULL; + struct rbnode_type* node; + lock_rw_wrlock(&doq_socket->table->lock); + node = rbtree_first(doq_socket->table->timer_tree); + if(node && node != RBTREE_NULL) { + struct doq_timer* t = (struct doq_timer*)node; + conn = t->conn; + + /* If now < timer then no further timeouts in tree. */ + if(timeval_smaller(doq_socket->now_tv, &t->time)) { + lock_rw_unlock(&doq_socket->table->lock); + return NULL; + } + + lock_basic_lock(&conn->lock); + conn->doq_socket = doq_socket; + + /* Now that the timer is fired, remove it. */ + doq_timer_unset(doq_socket->table, t); + lock_rw_unlock(&doq_socket->table->lock); + return conn; + } + lock_rw_unlock(&doq_socket->table->lock); + return NULL; +} + +/** doq timer erase the marker that said which timer the worker uses. */ +static void +doq_timer_erase_marker(struct doq_server_socket* doq_socket) +{ + struct doq_timer* t; + lock_rw_wrlock(&doq_socket->table->lock); + t = doq_timer_find_time(doq_socket->table, &doq_socket->marked_time); + if(t && t->worker_doq_socket == doq_socket) + t->worker_doq_socket = NULL; + lock_rw_unlock(&doq_socket->table->lock); + memset(&doq_socket->marked_time, 0, sizeof(doq_socket->marked_time)); +} + +void +doq_timer_cb(void* arg) +{ + struct doq_server_socket* doq_socket = (struct doq_server_socket*)arg; + struct doq_conn* conn; + verbose(VERB_ALGO, "doq timer callback"); + + doq_timer_erase_marker(doq_socket); + + while((conn = doq_timer_timeout_conn(doq_socket)) != NULL) { + if(conn->is_deleted || +#ifdef HAVE_NGTCP2_CONN_IN_CLOSING_PERIOD + ngtcp2_conn_in_closing_period(conn->conn) || +#else + ngtcp2_conn_is_in_closing_period(conn->conn) || +#endif +#ifdef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD + ngtcp2_conn_in_draining_period(conn->conn) +#else + ngtcp2_conn_is_in_draining_period(conn->conn) +#endif + ) { + if(verbosity >= VERB_ALGO) { + char remotestr[256]; + addr_to_str((void*)&conn->key.paddr.addr, + conn->key.paddr.addrlen, remotestr, + sizeof(remotestr)); + verbose(VERB_ALGO, "doq conn %s is deleted " + "after timeout", remotestr); + } + doq_delete_connection(doq_socket->cp, conn); + continue; + } + if(!doq_conn_handle_timeout(conn)) + doq_delete_connection(doq_socket->cp, conn); + else doq_done_setup_timer_and_write(doq_socket->cp, conn); + } + + if(doq_socket_want_write(doq_socket->cp)) + doq_socket_write_enable(doq_socket->cp); + else doq_socket_write_disable(doq_socket->cp); + doq_pickup_timer(doq_socket->cp); +} + +void +comm_point_doq_callback(int fd, short event, void* arg) +{ + struct comm_point* c; + struct doq_pkt_addr paddr; + int i, pkt_continue, err_drop; + struct doq_conn* conn; + struct ngtcp2_pkt_info pi; + size_t count, num_len; + + c = (struct comm_point*)arg; + log_assert(c->type == comm_doq); + + log_assert(c && c->doq_socket->pkt_buf && c->fd == fd); + ub_comm_base_now(c->ev->base); + + /* see if there is a blocked packet, and send that if possible. + * do not attempt to read yet, even if possible, that would just + * push more answers in reply to those read packets onto the list + * of written replies. First attempt to clear the write content out. + * That keeps the memory usage from bloating up. */ + if(c->doq_socket->have_blocked_pkt) { + if(!doq_write_blocked_pkt(c)) { + /* this write has also blocked, attempt to write + * later. Make sure the event listens to write + * events. */ + if(!c->doq_socket->event_has_write) + doq_socket_write_enable(c); + doq_pickup_timer(c); + return; + } + } + + /* see if there is write interest */ + count = 0; + num_len = doq_write_list_length(c); + while((conn = doq_pop_write_conn(c)) != NULL) { + if(conn->is_deleted || +#ifdef HAVE_NGTCP2_CONN_IN_CLOSING_PERIOD + ngtcp2_conn_in_closing_period(conn->conn) || +#else + ngtcp2_conn_is_in_closing_period(conn->conn) || +#endif +#ifdef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD + ngtcp2_conn_in_draining_period(conn->conn) +#else + ngtcp2_conn_is_in_draining_period(conn->conn) +#endif + ) { + conn->doq_socket = NULL; + lock_basic_unlock(&conn->lock); + if(c->doq_socket->have_blocked_pkt) { + if(!c->doq_socket->event_has_write) + doq_socket_write_enable(c); + doq_pickup_timer(c); + return; + } + if(++count > num_len*2) + break; + continue; + } + if(verbosity >= VERB_ALGO) { + char remotestr[256]; + addr_to_str((void*)&conn->key.paddr.addr, + conn->key.paddr.addrlen, remotestr, + sizeof(remotestr)); + verbose(VERB_ALGO, "doq write connection %s %d", + remotestr, doq_sockaddr_get_port( + &conn->key.paddr.addr)); + } + if(doq_conn_write_streams(c, conn, &err_drop)) + err_drop = 0; + doq_done_with_write_cb(c, conn, err_drop); + if(c->doq_socket->have_blocked_pkt) { + if(!c->doq_socket->event_has_write) + doq_socket_write_enable(c); + doq_pickup_timer(c); + return; + } + /* Stop overly long write lists that are created + * while we are processing. Do those next time there + * is a write callback. Stops long loops, and keeps + * fair for other events. */ + if(++count > num_len*2) + break; + } + + /* check for data to read */ + if((event&UB_EV_READ)!=0) + for(i=0; idoq_socket->have_blocked_pkt) { + if(!c->doq_socket->event_has_write) + doq_socket_write_enable(c); + doq_pickup_timer(c); + return; + } + sldns_buffer_clear(c->doq_socket->pkt_buf); + doq_pkt_addr_init(&paddr); + log_assert(fd != -1); + log_assert(sldns_buffer_remaining(c->doq_socket->pkt_buf) > 0); + if(!doq_recv(c, &paddr, &pkt_continue, &pi)) { + if(pkt_continue) + continue; + break; + } + + /* handle incoming packet from remote addr to localaddr */ + if(verbosity >= VERB_ALGO) { + char remotestr[256], localstr[256]; + addr_to_str((void*)&paddr.addr, paddr.addrlen, + remotestr, sizeof(remotestr)); + addr_to_str((void*)&paddr.localaddr, + paddr.localaddrlen, localstr, + sizeof(localstr)); + log_info("incoming doq packet from %s port %d on " + "%s port %d ifindex %d", + remotestr, doq_sockaddr_get_port(&paddr.addr), + localstr, + doq_sockaddr_get_port(&paddr.localaddr), + paddr.ifindex); + log_info("doq_recv length %d ecn 0x%x", + (int)sldns_buffer_limit(c->doq_socket->pkt_buf), + (int)pi.ecn); + } + + if(sldns_buffer_limit(c->doq_socket->pkt_buf) == 0) + continue; + + conn = NULL; + if(!doq_decode_pkt_header_negotiate(c, &paddr, &conn)) + continue; + if(!conn) { + if(!doq_accept(c, &paddr, &conn, &pi)) + continue; + if(!doq_conn_write_streams(c, conn, NULL)) { + doq_delete_connection(c, conn); + continue; + } + doq_done_setup_timer_and_write(c, conn); + continue; + } + if( +#ifdef HAVE_NGTCP2_CONN_IN_CLOSING_PERIOD + ngtcp2_conn_in_closing_period(conn->conn) +#else + ngtcp2_conn_is_in_closing_period(conn->conn) +#endif + ) { + if(!doq_conn_send_close(c, conn)) { + doq_delete_connection(c, conn); + } else { + doq_done_setup_timer_and_write(c, conn); + } + continue; + } + if( +#ifdef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD + ngtcp2_conn_in_draining_period(conn->conn) +#else + ngtcp2_conn_is_in_draining_period(conn->conn) +#endif + ) { + doq_done_setup_timer_and_write(c, conn); + continue; + } + if(!doq_conn_recv(c, &paddr, conn, &pi, NULL, &err_drop)) { + /* The receive failed, and if it also failed to send + * a close, drop the connection. That means it is not + * in the closing period. */ + if(err_drop) { + doq_delete_connection(c, conn); + } else { + doq_done_setup_timer_and_write(c, conn); + } + continue; + } + if(!doq_conn_write_streams(c, conn, &err_drop)) { + if(err_drop) { + doq_delete_connection(c, conn); + } else { + doq_done_setup_timer_and_write(c, conn); + } + continue; + } + doq_done_setup_timer_and_write(c, conn); + } + + /* see if we want to have more write events */ + verbose(VERB_ALGO, "doq check write enable"); + if(doq_socket_want_write(c)) + doq_socket_write_enable(c); + else doq_socket_write_disable(c); + doq_pickup_timer(c); +} + +/** create new doq server socket structure */ +static struct doq_server_socket* +doq_server_socket_create(struct doq_table* table, struct ub_randstate* rnd, + const char* ssl_service_key, const char* ssl_service_pem, + struct comm_point* c, struct comm_base* base, struct config_file* cfg) +{ + size_t doq_buffer_size = 4096; /* bytes buffer size, for one packet. */ + struct doq_server_socket* doq_socket; + doq_socket = calloc(1, sizeof(*doq_socket)); + if(!doq_socket) { + return NULL; + } + doq_socket->table = table; + doq_socket->rnd = rnd; + doq_socket->validate_addr = 1; + if(ssl_service_key == NULL || ssl_service_key[0]==0) { + log_err("doq server socket create: no tls-service-key"); + free(doq_socket); + return NULL; + } + if(ssl_service_pem == NULL || ssl_service_pem[0]==0) { + log_err("doq server socket create: no tls-service-pem"); + free(doq_socket); + return NULL; + } + doq_socket->ssl_service_key = strdup(ssl_service_key); + if(!doq_socket->ssl_service_key) { + free(doq_socket); + return NULL; + } + doq_socket->ssl_service_pem = strdup(ssl_service_pem); + if(!doq_socket->ssl_service_pem) { + free(doq_socket->ssl_service_key); + free(doq_socket); + return NULL; + } + doq_socket->ssl_verify_pem = NULL; + /* the doq_socket has its own copy of the static secret, as + * well as other config values, so that they do not need table.lock */ + doq_socket->static_secret_len = table->static_secret_len; + doq_socket->static_secret = memdup(table->static_secret, + table->static_secret_len); + if(!doq_socket->static_secret) { + free(doq_socket->ssl_service_key); + free(doq_socket->ssl_service_pem); + free(doq_socket->ssl_verify_pem); + free(doq_socket); + return NULL; + } + if(!doq_socket_setup_ctx(doq_socket)) { + free(doq_socket->ssl_service_key); + free(doq_socket->ssl_service_pem); + free(doq_socket->ssl_verify_pem); + free(doq_socket->static_secret); + free(doq_socket); + return NULL; + } + doq_socket->idle_timeout = table->idle_timeout; + doq_socket->sv_scidlen = table->sv_scidlen; + doq_socket->cp = c; + doq_socket->pkt_buf = sldns_buffer_new(doq_buffer_size); + if(!doq_socket->pkt_buf) { + free(doq_socket->ssl_service_key); + free(doq_socket->ssl_service_pem); + free(doq_socket->ssl_verify_pem); + free(doq_socket->static_secret); + SSL_CTX_free(doq_socket->ctx); + free(doq_socket); + return NULL; + } + doq_socket->blocked_pkt = sldns_buffer_new( + sldns_buffer_capacity(doq_socket->pkt_buf)); + if(!doq_socket->pkt_buf) { + free(doq_socket->ssl_service_key); + free(doq_socket->ssl_service_pem); + free(doq_socket->ssl_verify_pem); + free(doq_socket->static_secret); + SSL_CTX_free(doq_socket->ctx); + sldns_buffer_free(doq_socket->pkt_buf); + free(doq_socket); + return NULL; + } + doq_socket->blocked_paddr = calloc(1, + sizeof(*doq_socket->blocked_paddr)); + if(!doq_socket->blocked_paddr) { + free(doq_socket->ssl_service_key); + free(doq_socket->ssl_service_pem); + free(doq_socket->ssl_verify_pem); + free(doq_socket->static_secret); + SSL_CTX_free(doq_socket->ctx); + sldns_buffer_free(doq_socket->pkt_buf); + sldns_buffer_free(doq_socket->blocked_pkt); + free(doq_socket); + return NULL; + } + doq_socket->timer = comm_timer_create(base, doq_timer_cb, doq_socket); + if(!doq_socket->timer) { + free(doq_socket->ssl_service_key); + free(doq_socket->ssl_service_pem); + free(doq_socket->ssl_verify_pem); + free(doq_socket->static_secret); + SSL_CTX_free(doq_socket->ctx); + sldns_buffer_free(doq_socket->pkt_buf); + sldns_buffer_free(doq_socket->blocked_pkt); + free(doq_socket->blocked_paddr); + free(doq_socket); + return NULL; + } + memset(&doq_socket->marked_time, 0, sizeof(doq_socket->marked_time)); + comm_base_timept(base, &doq_socket->now_tt, &doq_socket->now_tv); + doq_socket->cfg = cfg; + return doq_socket; +} + +/** delete doq server socket structure */ +static void +doq_server_socket_delete(struct doq_server_socket* doq_socket) +{ + if(!doq_socket) + return; + free(doq_socket->static_secret); + SSL_CTX_free(doq_socket->ctx); +#ifndef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT + free(doq_socket->quic_method); +#endif + free(doq_socket->ssl_service_key); + free(doq_socket->ssl_service_pem); + free(doq_socket->ssl_verify_pem); + sldns_buffer_free(doq_socket->pkt_buf); + sldns_buffer_free(doq_socket->blocked_pkt); + free(doq_socket->blocked_paddr); + comm_timer_delete(doq_socket->timer); + free(doq_socket); +} + +/** find repinfo in the doq table */ +static struct doq_conn* +doq_lookup_repinfo(struct doq_table* table, struct comm_reply* repinfo) +{ + struct doq_conn* conn; + struct doq_conn_key key; + doq_conn_key_from_repinfo(&key, repinfo); + lock_rw_rdlock(&table->lock); + conn = doq_conn_find(table, &key.paddr.addr, + key.paddr.addrlen, &key.paddr.localaddr, + key.paddr.localaddrlen, key.paddr.ifindex, key.dcid, + key.dcidlen); + if(conn) { + lock_basic_lock(&conn->lock); + lock_rw_unlock(&table->lock); + return conn; + } + lock_rw_unlock(&table->lock); + return NULL; +} + +/** doq find connection and stream. From inside callbacks from worker. */ +static int +doq_lookup_conn_stream(struct comm_reply* repinfo, struct comm_point* c, + struct doq_conn** conn, struct doq_stream** stream) +{ + if(c->doq_socket->current_conn) { + *conn = c->doq_socket->current_conn; + } else { + *conn = doq_lookup_repinfo(c->doq_socket->table, repinfo); + if((*conn) && (*conn)->is_deleted) { + lock_basic_unlock(&(*conn)->lock); + *conn = NULL; + } + if(*conn) { + (*conn)->doq_socket = c->doq_socket; + } + } + if(!*conn) { + *stream = NULL; + return 0; + } + *stream = doq_stream_find(*conn, repinfo->doq_streamid); + if(!*stream) { + if(!c->doq_socket->current_conn) { + /* Not inside callbacks, we have our own lock on conn. + * Release it. */ + lock_basic_unlock(&(*conn)->lock); + } + return 0; + } + if((*stream)->is_closed) { + /* stream is closed, ignore reply or drop */ + if(!c->doq_socket->current_conn) { + /* Not inside callbacks, we have our own lock on conn. + * Release it. */ + lock_basic_unlock(&(*conn)->lock); + } + return 0; + } + return 1; +} + +/** doq send a reply from a comm reply */ +static void +doq_socket_send_reply(struct comm_reply* repinfo) +{ + struct doq_conn* conn; + struct doq_stream* stream; + log_assert(repinfo->c->type == comm_doq); + if(!doq_lookup_conn_stream(repinfo, repinfo->c, &conn, &stream)) { + verbose(VERB_ALGO, "doq: send_reply but %s is gone", + (conn?"stream":"connection")); + /* No stream, it may have been closed. */ + /* Drop the reply, it cannot be sent. */ + return; + } + if(!doq_stream_send_reply(conn, stream, repinfo->c->buffer)) + doq_stream_close(conn, stream, 1); + if(!repinfo->c->doq_socket->current_conn) { + /* Not inside callbacks, we have our own lock on conn. + * Release it. */ + doq_done_with_conn_cb(repinfo->c, conn); + /* since we sent a reply, or closed it, the assumption is + * that there is something to write, so enable write event. + * It waits until the write event happens to write the + * streams with answers, this allows some answers to be + * answered before the event loop reaches the doq fd, in + * repinfo->c->fd, and that collates answers. That would + * not happen if we write doq packets right now. */ + doq_socket_write_enable(repinfo->c); + } +} + +/** doq drop a reply from a comm reply */ +static void +doq_socket_drop_reply(struct comm_reply* repinfo) +{ + struct doq_conn* conn; + struct doq_stream* stream; + log_assert(repinfo->c->type == comm_doq); + if(!doq_lookup_conn_stream(repinfo, repinfo->c, &conn, &stream)) { + verbose(VERB_ALGO, "doq: drop_reply but %s is gone", + (conn?"stream":"connection")); + /* The connection or stream is already gone. */ + return; + } + doq_stream_close(conn, stream, 1); + if(!repinfo->c->doq_socket->current_conn) { + /* Not inside callbacks, we have our own lock on conn. + * Release it. */ + doq_done_with_conn_cb(repinfo->c, conn); + doq_socket_write_enable(repinfo->c); + } +} +#endif /* HAVE_NGTCP2 */ + int adjusted_tcp_timeout(struct comm_point* c) { if(c->tcp_timeout_msec < TCP_QUERY_TIMEOUT_MINIMUM) @@ -4081,6 +5857,96 @@ comm_point_create_udp_ancil(struct comm_base *base, int fd, } #endif +struct comm_point* +comm_point_create_doq(struct comm_base *base, int fd, sldns_buffer* buffer, + comm_point_callback_type* callback, void* callback_arg, + struct unbound_socket* socket, struct doq_table* table, + struct ub_randstate* rnd, const char* ssl_service_key, + const char* ssl_service_pem, struct config_file* cfg) +{ +#ifdef HAVE_NGTCP2 + struct comm_point* c = (struct comm_point*)calloc(1, + sizeof(struct comm_point)); + short evbits; + if(!c) + return NULL; + c->ev = (struct internal_event*)calloc(1, + sizeof(struct internal_event)); + if(!c->ev) { + free(c); + return NULL; + } + c->ev->base = base; + c->fd = fd; + c->buffer = buffer; + c->timeout = NULL; + c->tcp_is_reading = 0; + c->tcp_byte_count = 0; + c->tcp_parent = NULL; + c->max_tcp_count = 0; + c->cur_tcp_count = 0; + c->tcp_handlers = NULL; + c->tcp_free = NULL; + c->type = comm_doq; + c->tcp_do_close = 0; + c->do_not_close = 0; + c->tcp_do_toggle_rw = 0; + c->tcp_check_nb_connect = 0; +#ifdef USE_MSG_FASTOPEN + c->tcp_do_fastopen = 0; +#endif +#ifdef USE_DNSCRYPT + c->dnscrypt = 0; + c->dnscrypt_buffer = NULL; +#endif +#ifdef HAVE_NGTCP2 + c->doq_socket = doq_server_socket_create(table, rnd, ssl_service_key, + ssl_service_pem, c, base, cfg); + if(!c->doq_socket) { + log_err("could not create doq comm_point"); + comm_point_delete(c); + return NULL; + } +#endif + c->inuse = 0; + c->callback = callback; + c->cb_arg = callback_arg; + c->socket = socket; + c->pp2_enabled = 0; + c->pp2_header_state = pp2_header_none; + evbits = UB_EV_READ | UB_EV_PERSIST; + /* ub_event stuff */ + c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits, + comm_point_doq_callback, c); + if(c->ev->ev == NULL) { + log_err("could not baseset udp event"); + comm_point_delete(c); + return NULL; + } + if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) { + log_err("could not add udp event"); + comm_point_delete(c); + return NULL; + } + c->event_added = 1; + return c; +#else + /* no libngtcp2, so no QUIC support */ + (void)base; + (void)buffer; + (void)callback; + (void)callback_arg; + (void)socket; + (void)rnd; + (void)table; + (void)ssl_service_key; + (void)ssl_service_pem; + (void)cfg; + sock_close(fd); + return NULL; +#endif /* HAVE_NGTCP2 */ +} + static struct comm_point* comm_point_create_tcp_handler(struct comm_base *base, struct comm_point* parent, size_t bufsize, @@ -4749,6 +6615,10 @@ comm_point_delete(struct comm_point* c) http2_session_delete(c->h2_session); } } +#ifdef HAVE_NGTCP2 + if(c->doq_socket) + doq_server_socket_delete(c->doq_socket); +#endif ub_event_free(c->ev->ev); free(c->ev); free(c); @@ -4811,6 +6681,10 @@ comm_point_send_reply(struct comm_reply *repinfo) comm_point_start_listening(repinfo->c, -1, adjusted_tcp_timeout(repinfo->c)); return; +#ifdef HAVE_NGTCP2 + } else if(repinfo->c->doq_socket) { + doq_socket_send_reply(repinfo); +#endif } else { comm_point_start_listening(repinfo->c, -1, adjusted_tcp_timeout(repinfo->c)); @@ -4838,6 +6712,11 @@ comm_point_drop_reply(struct comm_reply* repinfo) } reclaim_http_handler(repinfo->c); return; +#ifdef HAVE_NGTCP2 + } else if(repinfo->c->type == comm_doq) { + doq_socket_drop_reply(repinfo); + return; +#endif } reclaim_tcp_handler(repinfo->c); } diff --git a/util/netevent.h b/util/netevent.h index 6f43ce56c..acc4887b1 100644 --- a/util/netevent.h +++ b/util/netevent.h @@ -65,6 +65,9 @@ #ifdef HAVE_NGHTTP2_NGHTTP2_H #include #endif +#ifdef HAVE_NGTCP2 +#include +#endif struct sldns_buffer; struct comm_point; @@ -72,6 +75,11 @@ struct comm_reply; struct tcl_list; struct ub_event_base; struct unbound_socket; +struct doq_server_socket; +struct doq_table; +struct doq_conn; +struct config_file; +struct ub_randstate; struct mesh_state; struct mesh_area; @@ -105,6 +113,8 @@ typedef int comm_point_callback_type(struct comm_point*, void*, int, #define NETEVENT_SLOW_ACCEPT_TIME 2000 /** timeout to slow down log print, so it does not spam the logs, in sec */ #define SLOW_LOG_TIME 10 +/** for doq, the maximum dcid length, in ngtcp2 it is 20. */ +#define DOQ_MAX_CIDLEN 24 /** * A communication point dispatcher. Thread specific. @@ -164,6 +174,19 @@ struct comm_reply { struct sockaddr_storage client_addr; /** the original address length */ socklen_t client_addrlen; +#ifdef HAVE_NGTCP2 + /** the doq ifindex, together with addr and localaddr in pktinfo, + * and dcid makes the doq_conn_key to find the connection */ + int doq_ifindex; + /** the doq dcid, the connection id used to find the connection */ + uint8_t doq_dcid[DOQ_MAX_CIDLEN]; + /** the length of the doq dcid */ + size_t doq_dcidlen; + /** the doq stream id where the query came in on */ + int64_t doq_streamid; + /** port number for doq */ + int doq_srcport; +#endif /* HAVE_NGTCP2 */ }; /** @@ -266,6 +289,11 @@ struct comm_point { /** maximum number of HTTP/2 streams per connection. Send in HTTP/2 * SETTINGS frame. */ uint32_t http2_max_streams; + /* -------- DoQ ------- */ +#ifdef HAVE_NGTCP2 + /** the doq server socket, with list of doq connections */ + struct doq_server_socket* doq_socket; +#endif /* -------- dnstap ------- */ /** the dnstap environment */ @@ -281,6 +309,8 @@ struct comm_point { comm_tcp, /** HTTP handler socket */ comm_http, + /** DOQ handler socket */ + comm_doq, /** AF_UNIX socket - for internal commands. */ comm_local, /** raw - not DNS format - for pipe readers and writers */ @@ -552,6 +582,30 @@ struct comm_point* comm_point_create_udp_ancil(struct comm_base* base, int fd, struct sldns_buffer* buffer, int pp2_enabled, comm_point_callback_type* callback, void* callback_arg, struct unbound_socket* socket); +/** + * Create an UDP comm point for DoQ. Calls malloc. + * setups the structure with the parameters you provide. + * @param base: in which base to alloc the commpoint. + * @param fd : file descriptor of open UDP socket. + * @param buffer: shared buffer by UDP sockets from this thread. + * @param callback: callback function pointer. + * @param callback_arg: will be passed to your callback function. + * @param socket: and opened socket properties will be passed to your callback function. + * @param table: the doq connection table for the host. + * @param rnd: random generator to use. + * @param ssl_service_key: the ssl service key file. + * @param ssl_service_pem: the ssl service pem file. + * @param cfg: config file struct. + * @return: returns the allocated communication point. NULL on error. + * Sets timeout to NULL. Turns off TCP options. + */ +struct comm_point* comm_point_create_doq(struct comm_base* base, + int fd, struct sldns_buffer* buffer, + comm_point_callback_type* callback, void* callback_arg, + struct unbound_socket* socket, struct doq_table* table, + struct ub_randstate* rnd, const char* ssl_service_key, + const char* ssl_service_pem, struct config_file* cfg); + /** * Create a TCP listener comm point. Calls malloc. * Setups the structure with the parameters you provide. @@ -821,6 +875,16 @@ void comm_point_udp_callback(int fd, short event, void* arg); */ void comm_point_udp_ancil_callback(int fd, short event, void* arg); +/** + * This routine is published for checks and tests, and is only used internally. + * handle libevent callback for doq comm point. + * @param fd: file descriptor. + * @param event: event bits from libevent: + * EV_READ, EV_WRITE, EV_SIGNAL, EV_TIMEOUT. + * @param arg: the comm_point structure. + */ +void comm_point_doq_callback(int fd, short event, void* arg); + /** * This routine is published for checks and tests, and is only used internally. * handle libevent callback for tcp accept comm point @@ -958,6 +1022,106 @@ void http2_stream_add_meshstate(struct http2_stream* h2_stream, /** Remove mesh state from stream. When the mesh state has been removed. */ void http2_stream_remove_mesh_state(struct http2_stream* h2_stream); +/** + * DoQ socket address storage for IP4 or IP6 address. Smaller than + * the sockaddr_storage because not with af_unix pathnames. + */ +struct doq_addr_storage { + union { + struct sockaddr_in in; +#ifdef AF_INET6 + struct sockaddr_in6 in6; +#endif + } sockaddr; +}; + +/** + * The DoQ server socket information, for DNS over QUIC. + */ +struct doq_server_socket { + /** the doq connection table */ + struct doq_table* table; + /** random generator */ + struct ub_randstate* rnd; + /** if address validation is enabled */ + uint8_t validate_addr; + /** the ssl service key file */ + char* ssl_service_key; + /** the ssl service pem file */ + char* ssl_service_pem; + /** the ssl verify pem file */ + char* ssl_verify_pem; + /** the server scid length */ + int sv_scidlen; + /** the idle timeout in nanoseconds */ + uint64_t idle_timeout; + /** the static secret for the server */ + uint8_t* static_secret; + /** length of the static secret */ + size_t static_secret_len; + /** ssl context, SSL_CTX* */ + void* ctx; +#ifndef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT + /** quic method functions, SSL_QUIC_METHOD* */ + void* quic_method; +#endif + /** the comm point for this doq server socket */ + struct comm_point* cp; + /** the buffer for packets, doq in and out */ + struct sldns_buffer* pkt_buf; + /** the current doq connection when we are in callbacks to worker, + * so that we have the already locked structure at our disposal. */ + struct doq_conn* current_conn; + /** if the callback event on the fd has write flags */ + uint8_t event_has_write; + /** if there is a blocked packet in the blocked_pkt buffer */ + int have_blocked_pkt; + /** store blocked packet, a packet that could not be send on the + * nonblocking socket. It has to be sent later, when the write on + * the udp socket unblocks. */ + struct sldns_buffer* blocked_pkt; +#ifdef HAVE_NGTCP2 + /** the ecn info for the blocked packet, congestion information. */ + struct ngtcp2_pkt_info blocked_pkt_pi; +#endif + /** the packet destination for the blocked packet. */ + struct doq_pkt_addr* blocked_paddr; + /** timer for this worker on this comm_point to wait on. */ + struct comm_timer* timer; + /** the timer that is marked by the doq_socket as waited on. */ + struct timeval marked_time; + /** the current time for use by time functions, time_t. */ + time_t* now_tt; + /** the current time for use by time functions, timeval. */ + struct timeval* now_tv; + /** config file for the worker. */ + struct config_file* cfg; +}; + +/** + * DoQ packet address information. From pktinfo, stores local and remote + * address and ifindex, so the packet can be sent there. + */ +struct doq_pkt_addr { + /** the remote addr, and local addr */ + struct doq_addr_storage addr, localaddr; + /** length of addr and length of localaddr */ + socklen_t addrlen, localaddrlen; + /** interface index from pktinfo ancillary information */ + int ifindex; +}; + +/** Initialize the pkt addr with lengths set to sizeof. That is ready for + * a call to recv. */ +void doq_pkt_addr_init(struct doq_pkt_addr* paddr); + +/** send doq packet over UDP. */ +void doq_send_pkt(struct comm_point* c, struct doq_pkt_addr* paddr, + uint32_t ecn); + +/** doq timer callback function. */ +void doq_timer_cb(void* arg); + /** * This routine is published for checks and tests, and is only used internally. * handle libevent callback for timer comm.