From d0f687d30f4c194d22fd238ee72aebe19d0f7be9 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Sun, 10 Feb 2019 13:31:08 +0000 Subject: [PATCH 01/93] Fix multiple warnings in usr.bin/top about discarded qualifiers from both clang and gcc, by either constifying variables, or when that is not possible, using __DECONST. MFC after: 1 week --- usr.bin/top/Makefile | 7 +------ usr.bin/top/display.c | 8 ++++---- usr.bin/top/display.h | 2 +- usr.bin/top/machine.c | 4 ++-- usr.bin/top/top.c | 4 ++-- usr.bin/top/utils.c | 2 +- usr.bin/top/utils.h | 2 +- 7 files changed, 12 insertions(+), 17 deletions(-) diff --git a/usr.bin/top/Makefile b/usr.bin/top/Makefile index d778e07c8c1..148fd96c191 100644 --- a/usr.bin/top/Makefile +++ b/usr.bin/top/Makefile @@ -7,14 +7,9 @@ SRCS= commands.c display.c machine.c screen.c top.c \ username.c utils.c MAN= top.1 -.if ${COMPILER_TYPE} == "gcc" -.if ${COMPILER_VERSION} >= 50000 -CFLAGS.gcc=-Wno-error=discarded-qualifiers -Wno-error=incompatible-pointer-types -.else #base gcc +.if ${COMPILER_TYPE} == "gcc" && ${COMPILER_VERSION} < 50000 NO_WERROR= .endif -.endif -CFLAGS.clang=-Wno-error=incompatible-pointer-types-discards-qualifiers LIBADD= ncursesw m kvm jail util sbuf .include diff --git a/usr.bin/top/display.c b/usr.bin/top/display.c index 862d2a944c8..03277ea86cf 100644 --- a/usr.bin/top/display.c +++ b/usr.bin/top/display.c @@ -184,7 +184,7 @@ int display_init(struct statics * statics) { int lines; - char **pp; + const char * const *pp; int *ip; int i; @@ -516,8 +516,8 @@ void z_cpustates(void) { int i = 0; - const char **names; - char *thisname; + const char * const *names; + const char *thisname; int cpu, value; for (cpu = 0; cpu < num_cpus; cpu++) { @@ -751,7 +751,7 @@ static int header_length; * allocated area with the trimmed header. */ -const char * +char * trim_header(const char *text) { char *s; diff --git a/usr.bin/top/display.h b/usr.bin/top/display.h index 5b5aa32fe22..546f21ce34b 100644 --- a/usr.bin/top/display.h +++ b/usr.bin/top/display.h @@ -27,7 +27,7 @@ void i_timeofday(time_t *tod); void i_uptime(struct timeval *bt, time_t *tod); void new_message(int type, const char *msgfmt, ...); int readline(char *buffer, int size, int numeric); -const char *trim_header(const char *text); +char *trim_header(const char *text); void u_arc(int *stats); void u_carc(int *stats); void u_cpustates(int *states); diff --git a/usr.bin/top/machine.c b/usr.bin/top/machine.c index 374c9da0edf..563efc624e2 100644 --- a/usr.bin/top/machine.c +++ b/usr.bin/top/machine.c @@ -618,7 +618,7 @@ get_old_proc(struct kinfo_proc *pp) pp->ki_udata = NOPROC; return (NULL); } - pp->ki_udata = oldp; + pp->ki_udata = __DECONST(void *, oldp); return (oldp); } @@ -634,7 +634,7 @@ get_io_stats(const struct kinfo_proc *pp, long *inp, long *oup, long *flp, static struct kinfo_proc dummy; long ret; - oldp = get_old_proc(pp); + oldp = get_old_proc(__DECONST(struct kinfo_proc *, pp)); if (oldp == NULL) { memset(&dummy, 0, sizeof(dummy)); oldp = &dummy; diff --git a/usr.bin/top/top.c b/usr.bin/top/top.c index 80fa446fc7b..650789689b6 100644 --- a/usr.bin/top/top.c +++ b/usr.bin/top/top.c @@ -219,7 +219,7 @@ end: } int -main(int argc, char *argv[]) +main(int argc, const char *argv[]) { int i; int active_procs; @@ -306,7 +306,7 @@ main(int argc, char *argv[]) optind = 1; } - while ((i = getopt_long(ac, av, "CSIHPabijJ:nquvzs:d:U:m:o:p:Ttw", longopts, NULL)) != EOF) + while ((i = getopt_long(ac, __DECONST(char * const *, av), "CSIHPabijJ:nquvzs:d:U:m:o:p:Ttw", longopts, NULL)) != EOF) { switch(i) { diff --git a/usr.bin/top/utils.c b/usr.bin/top/utils.c index 93bbcad09dc..1ac0d6ad151 100644 --- a/usr.bin/top/utils.c +++ b/usr.bin/top/utils.c @@ -146,7 +146,7 @@ string_index(const char *string, const char * const *array) * squat about quotes. */ -const char * const * +const char ** argparse(char *line, int *cntp) { const char **ap; diff --git a/usr.bin/top/utils.h b/usr.bin/top/utils.h index 106e1da0896..2688f551857 100644 --- a/usr.bin/top/utils.h +++ b/usr.bin/top/utils.h @@ -16,7 +16,7 @@ int atoiwi(const char *); char *itoa(unsigned int); char *itoa7(int); int digits(int); -const char * const *argparse(char *, int *); +const char **argparse(char *, int *); long percentages(int, int *, long *, long *, long *); const char *format_time(long); char *format_k(int64_t); From 2f301637c8b662c3aa9e698db39095d0a62729de Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Sun, 10 Feb 2019 13:34:21 +0000 Subject: [PATCH 02/93] Fix multiple warnings in usr.bin/top about variables shadowing global declarations from base gcc, by renaming those variables. MFC after: 1 week --- usr.bin/top/Makefile | 4 ---- usr.bin/top/username.c | 6 +++--- usr.bin/top/utils.c | 6 +++--- 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/usr.bin/top/Makefile b/usr.bin/top/Makefile index 148fd96c191..d11b91273b5 100644 --- a/usr.bin/top/Makefile +++ b/usr.bin/top/Makefile @@ -7,9 +7,5 @@ SRCS= commands.c display.c machine.c screen.c top.c \ username.c utils.c MAN= top.1 -.if ${COMPILER_TYPE} == "gcc" && ${COMPILER_VERSION} < 50000 -NO_WERROR= -.endif - LIBADD= ncursesw m kvm jail util sbuf .include diff --git a/usr.bin/top/username.c b/usr.bin/top/username.c index 3b15b6a2231..be2efa8f213 100644 --- a/usr.bin/top/username.c +++ b/usr.bin/top/username.c @@ -70,7 +70,7 @@ username(int uid) } int -userid(char username[]) +userid(char username_[]) { struct passwd *pwd; @@ -78,13 +78,13 @@ userid(char username[]) but for now we just do it simply and remember just the result. */ - if ((pwd = getpwnam(username)) == NULL) + if ((pwd = getpwnam(username_)) == NULL) { return(-1); } /* enter the result in the hash table */ - enter_user(pwd->pw_uid, username, 1); + enter_user(pwd->pw_uid, username_, 1); /* return our result */ return(pwd->pw_uid); diff --git a/usr.bin/top/utils.c b/usr.bin/top/utils.c index 1ac0d6ad151..b3d4b15be28 100644 --- a/usr.bin/top/utils.c +++ b/usr.bin/top/utils.c @@ -292,11 +292,11 @@ char * format_k(int64_t amt) { static char retarray[NUM_STRINGS][16]; - static int index = 0; + static int index_ = 0; char *ret; - ret = retarray[index]; - index = (index + 1) % NUM_STRINGS; + ret = retarray[index_]; + index_ = (index_ + 1) % NUM_STRINGS; humanize_number(ret, 6, amt * 1024, "", HN_AUTOSCALE, HN_NOSPACE); return (ret); } From 7362ea6db0737a6f2862faccb11650662a43ed90 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Sun, 10 Feb 2019 13:44:36 +0000 Subject: [PATCH 03/93] Fix the first couple of AddressSanitizer violations in usr.bin/top. Avoid setting zero bytes beyond the length of the 'thisline' parameters in i_process() and u_process(), and don't attempt to memset a negative number of bytes. MFC after: 1 week --- usr.bin/top/display.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/usr.bin/top/display.c b/usr.bin/top/display.c index 03277ea86cf..b17be6e8356 100644 --- a/usr.bin/top/display.c +++ b/usr.bin/top/display.c @@ -829,7 +829,11 @@ i_process(int line, char *thisline) } /* truncate the line to conform to our current screen width */ - thisline[screen_width] = '\0'; + int len = strlen(thisline); + if (screen_width < len) + { + thisline[screen_width] = '\0'; + } /* write the line out */ fputs(thisline, stdout); @@ -839,7 +843,10 @@ i_process(int line, char *thisline) p = stpcpy(base, thisline); /* zero fill the rest of it */ - memset(p, 0, screen_width - (p - base)); + if (p - base < screen_width) + { + memset(p, 0, screen_width - (p - base)); + } } void @@ -853,7 +860,11 @@ u_process(int line, char *newline) bufferline = &screenbuf[lineindex(line)]; /* truncate the line to conform to our current screen width */ - newline[screen_width] = '\0'; + int len = strlen(newline); + if (screen_width < len) + { + newline[screen_width] = '\0'; + } /* is line higher than we went on the last display? */ if (line >= last_hi) @@ -878,7 +889,10 @@ u_process(int line, char *newline) optr = stpcpy(bufferline, newline); /* zero fill the rest of it */ - memset(optr, 0, screen_width - (optr - bufferline)); + if (optr - bufferline < screen_width) + { + memset(optr, 0, screen_width - (optr - bufferline)); + } } else { From 507bb10421e36af4b17ed090f9a4a64a096ac25b Mon Sep 17 00:00:00 2001 From: Michael Tuexen Date: Sun, 10 Feb 2019 13:55:32 +0000 Subject: [PATCH 04/93] Fix a locking issue in the IPPROTO_SCTP level SCTP_PEER_ADDR_THLDS socket option. The problem affects only setsockopt with invalid parameters. This issue was found by syzkaller. MFC after: 3 days --- sys/netinet/sctp_usrreq.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sys/netinet/sctp_usrreq.c b/sys/netinet/sctp_usrreq.c index 433d02a0898..79109373dbf 100644 --- a/sys/netinet/sctp_usrreq.c +++ b/sys/netinet/sctp_usrreq.c @@ -6335,6 +6335,9 @@ sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, } } if (thlds->spt_pathcpthld != 0xffff) { + if (stcb != NULL) { + SCTP_TCB_UNLOCK(stcb); + } error = EINVAL; SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); break; From d9707e43dffe4f4dc14d3537ea464aad20202981 Mon Sep 17 00:00:00 2001 From: Michael Tuexen Date: Sun, 10 Feb 2019 14:02:14 +0000 Subject: [PATCH 05/93] Fix a locking issue when reporing outbount messages. MFC after: 3 days --- sys/netinet/sctputil.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/netinet/sctputil.c b/sys/netinet/sctputil.c index 5fc57fe139e..cd6fce9e4a4 100644 --- a/sys/netinet/sctputil.c +++ b/sys/netinet/sctputil.c @@ -3946,7 +3946,7 @@ sctp_report_all_outbound(struct sctp_tcb *stcb, uint16_t error, int holds_lock, TAILQ_FOREACH_SAFE(sp, &outs->outqueue, next, nsp) { atomic_subtract_int(&asoc->stream_queue_cnt, 1); TAILQ_REMOVE(&outs->outqueue, sp, next); - stcb->asoc.ss_functions.sctp_ss_remove_from_stream(stcb, asoc, outs, sp, holds_lock); + stcb->asoc.ss_functions.sctp_ss_remove_from_stream(stcb, asoc, outs, sp, 1); sctp_free_spbufspace(stcb, asoc, sp); if (sp->data) { sctp_ulp_notify(SCTP_NOTIFY_SPECIAL_SP_FAIL, stcb, From 74a2bcfa8099ecf863b3aef2d5f436b6ddf9e6ae Mon Sep 17 00:00:00 2001 From: Michal Meloun Date: Sun, 10 Feb 2019 14:25:29 +0000 Subject: [PATCH 06/93] Properly handle alignment requests bigger that page size. - for now, alignments bigger that page size is allowed only for buffers allocated by bus_dmamem_alloc(), cover this fact by KASSERT. - never bounce buffers allocated by bus_dmamem_alloc(), these always comply with the required rules (alignment, boundary, address range). MFC after: 1 week Reviewed by: jah PR: 235542 --- sys/arm/arm/busdma_machdep-v6.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/sys/arm/arm/busdma_machdep-v6.c b/sys/arm/arm/busdma_machdep-v6.c index 2aff3f8a150..ce7e8acdfea 100644 --- a/sys/arm/arm/busdma_machdep-v6.c +++ b/sys/arm/arm/busdma_machdep-v6.c @@ -339,16 +339,27 @@ cacheline_bounce(bus_dmamap_t map, bus_addr_t addr, bus_size_t size) * * Note that the addr argument might be either virtual or physical. It doesn't * matter because we only look at the low-order bits, which are the same in both - * address spaces. + * address spaces and maximum alignment of generic buffer is limited up to page + * size. + * Bouncing of buffers allocated by bus_dmamem_alloc()is not necessary, these + * always comply with the required rules (alignment, boundary, and address + * range). */ static __inline int might_bounce(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t addr, bus_size_t size) { - return ((dmat->flags & BUS_DMA_EXCL_BOUNCE) || + KASSERT(dmat->flags & DMAMAP_DMAMEM_ALLOC || + dmat->alignment <= PAGE_SIZE, + ("%s: unsupported alignment (0x%08lx) for buffer not " + "allocated by bus_dmamem_alloc()", + __func__, dmat->alignment)); + + return (!(dmat->flags & DMAMAP_DMAMEM_ALLOC) && + ((dmat->flags & BUS_DMA_EXCL_BOUNCE) || alignment_bounce(dmat, addr) || - cacheline_bounce(map, addr, size)); + cacheline_bounce(map, addr, size))); } /* From e609023c0b650692ab9a39d87d23adb1b38588a2 Mon Sep 17 00:00:00 2001 From: Michal Meloun Date: Sun, 10 Feb 2019 14:30:15 +0000 Subject: [PATCH 07/93] Don't allocate same clock twice.. MFC after: 1 week Reported by: jah --- sys/arm/nvidia/tegra_sdhci.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/sys/arm/nvidia/tegra_sdhci.c b/sys/arm/nvidia/tegra_sdhci.c index 2a49a5ae059..cf02486c66f 100644 --- a/sys/arm/nvidia/tegra_sdhci.c +++ b/sys/arm/nvidia/tegra_sdhci.c @@ -311,13 +311,6 @@ tegra_sdhci_attach(device_t dev) gpio_pin_get_by_ofw_property(sc->dev, node, "power-gpios", &sc->gpio_power); - rv = clk_get_by_ofw_index(dev, 0, 0, &sc->clk); - if (rv != 0) { - - device_printf(dev, "Cannot get clock\n"); - goto fail; - } - rv = clk_get_by_ofw_index(dev, 0, 0, &sc->clk); if (rv != 0) { device_printf(dev, "Cannot get clock\n"); From fa50a3552d1e759e1bb65e54cb0b7e863bcf54d5 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Sun, 10 Feb 2019 17:19:45 +0000 Subject: [PATCH 08/93] Implement Address Space Layout Randomization (ASLR) With this change, randomization can be enabled for all non-fixed mappings. It means that the base address for the mapping is selected with a guaranteed amount of entropy (bits). If the mapping was requested to be superpage aligned, the randomization honours the superpage attributes. Although the value of ASLR is diminshing over time as exploit authors work out simple ASLR bypass techniques, it elimintates the trivial exploitation of certain vulnerabilities, at least in theory. This implementation is relatively small and happens at the correct architectural level. Also, it is not expected to introduce regressions in existing cases when turned off (default for now), or cause any significant maintaince burden. The randomization is done on a best-effort basis - that is, the allocator falls back to a first fit strategy if fragmentation prevents entropy injection. It is trivial to implement a strong mode where failure to guarantee the requested amount of entropy results in mapping request failure, but I do not consider that to be usable. I have not fine-tuned the amount of entropy injected right now. It is only a quantitive change that will not change the implementation. The current amount is controlled by aslr_pages_rnd. To not spoil coalescing optimizations, to reduce the page table fragmentation inherent to ASLR, and to keep the transient superpage promotion for the malloced memory, locality clustering is implemented for anonymous private mappings, which are automatically grouped until fragmentation kicks in. The initial location for the anon group range is, of course, randomized. This is controlled by vm.cluster_anon, enabled by default. The default mode keeps the sbrk area unpopulated by other mappings, but this can be turned off, which gives much more breathing bits on architectures with small address space, such as i386. This is tied with the question of following an application's hint about the mmap(2) base address. Testing shows that ignoring the hint does not affect the function of common applications, but I would expect more demanding code could break. By default sbrk is preserved and mmap hints are satisfied, which can be changed by using the kern.elf{32,64}.aslr.honor_sbrk sysctl. ASLR is enabled on per-ABI basis, and currently it is only allowed on FreeBSD native i386 and amd64 (including compat 32bit) ABIs. Support for additional architectures will be added after further testing. Both per-process and per-image controls are implemented: - procctl(2) adds PROC_ASLR_CTL/PROC_ASLR_STATUS; - NT_FREEBSD_FCTL_ASLR_DISABLE feature control note bit makes it possible to force ASLR off for the given binary. (A tool to edit the feature control note is in development.) Global controls are: - kern.elf{32,64}.aslr.enable - for non-fixed mappings done by mmap(2); - kern.elf{32,64}.aslr.pie_enable - for PIE image activation mappings; - kern.elf{32,64}.aslr.honor_sbrk - allow to use sbrk area for mmap(2); - vm.cluster_anon - enables anon mapping clustering. PR: 208580 (exp runs) Exp-runs done by: antoine Reviewed by: markj (previous version) Discussed with: emaste Tested by: pho MFC after: 1 month Sponsored by: The FreeBSD Foundation Differential revision: https://reviews.freebsd.org/D5603 --- sys/amd64/amd64/elf_machdep.c | 3 +- sys/arm/arm/elf_machdep.c | 2 +- sys/compat/freebsd32/freebsd32_misc.c | 3 + sys/compat/ia32/ia32_sysvec.c | 2 +- sys/i386/i386/elf_machdep.c | 4 +- sys/kern/imgact_elf.c | 134 +++++++++++++++++++++++++- sys/kern/kern_exec.c | 9 +- sys/kern/kern_fork.c | 3 +- sys/kern/kern_procctl.c | 72 ++++++++++++++ sys/sys/imgact.h | 1 + sys/sys/proc.h | 3 + sys/sys/procctl.h | 7 ++ sys/sys/sysent.h | 1 + sys/vm/vm_map.c | 116 +++++++++++++++++++++- sys/vm/vm_map.h | 4 + usr.bin/proccontrol/proccontrol.c | 32 +++++- 16 files changed, 377 insertions(+), 19 deletions(-) diff --git a/sys/amd64/amd64/elf_machdep.c b/sys/amd64/amd64/elf_machdep.c index f70d86ed1a9..891fd18cdf4 100644 --- a/sys/amd64/amd64/elf_machdep.c +++ b/sys/amd64/amd64/elf_machdep.c @@ -73,7 +73,8 @@ struct sysentvec elf64_freebsd_sysvec = { .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, - .sv_flags = SV_ABI_FREEBSD | SV_LP64 | SV_SHP | SV_TIMEKEEP, + .sv_flags = SV_ABI_FREEBSD | SV_ASLR | SV_LP64 | SV_SHP | + SV_TIMEKEEP, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, diff --git a/sys/arm/arm/elf_machdep.c b/sys/arm/arm/elf_machdep.c index acfa1e6649f..50e53bd9302 100644 --- a/sys/arm/arm/elf_machdep.c +++ b/sys/arm/arm/elf_machdep.c @@ -82,7 +82,7 @@ struct sysentvec elf32_freebsd_sysvec = { .sv_maxssiz = NULL, .sv_flags = #if __ARM_ARCH >= 6 - SV_SHP | SV_TIMEKEEP | + SV_ASLR | SV_SHP | SV_TIMEKEEP | #endif SV_ABI_FREEBSD | SV_ILP32, .sv_set_syscall_retval = cpu_set_syscall_retval, diff --git a/sys/compat/freebsd32/freebsd32_misc.c b/sys/compat/freebsd32/freebsd32_misc.c index c42d32e39d0..f411815dc1a 100644 --- a/sys/compat/freebsd32/freebsd32_misc.c +++ b/sys/compat/freebsd32/freebsd32_misc.c @@ -3328,6 +3328,7 @@ freebsd32_procctl(struct thread *td, struct freebsd32_procctl_args *uap) int error, error1, flags, signum; switch (uap->com) { + case PROC_ASLR_CTL: case PROC_SPROTECT: case PROC_TRACE_CTL: case PROC_TRAPCAP_CTL: @@ -3359,6 +3360,7 @@ freebsd32_procctl(struct thread *td, struct freebsd32_procctl_args *uap) return (error); data = &x.rk; break; + case PROC_ASLR_STATUS: case PROC_TRACE_STATUS: case PROC_TRAPCAP_STATUS: data = &flags; @@ -3387,6 +3389,7 @@ freebsd32_procctl(struct thread *td, struct freebsd32_procctl_args *uap) if (error == 0) error = error1; break; + case PROC_ASLR_STATUS: case PROC_TRACE_STATUS: case PROC_TRAPCAP_STATUS: if (error == 0) diff --git a/sys/compat/ia32/ia32_sysvec.c b/sys/compat/ia32/ia32_sysvec.c index 60652514641..07a041711a1 100644 --- a/sys/compat/ia32/ia32_sysvec.c +++ b/sys/compat/ia32/ia32_sysvec.c @@ -119,7 +119,7 @@ struct sysentvec ia32_freebsd_sysvec = { .sv_setregs = ia32_setregs, .sv_fixlimit = ia32_fixlimit, .sv_maxssiz = &ia32_maxssiz, - .sv_flags = SV_ABI_FREEBSD | SV_IA32 | SV_ILP32 | + .sv_flags = SV_ABI_FREEBSD | SV_ASLR | SV_IA32 | SV_ILP32 | SV_SHP | SV_TIMEKEEP, .sv_set_syscall_retval = ia32_set_syscall_retval, .sv_fetch_syscall_args = ia32_fetch_syscall_args, diff --git a/sys/i386/i386/elf_machdep.c b/sys/i386/i386/elf_machdep.c index 59c192f5923..b4532b47fa4 100644 --- a/sys/i386/i386/elf_machdep.c +++ b/sys/i386/i386/elf_machdep.c @@ -75,8 +75,8 @@ struct sysentvec elf32_freebsd_sysvec = { .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, - .sv_flags = SV_ABI_FREEBSD | SV_IA32 | SV_ILP32 | SV_SHP | - SV_TIMEKEEP, + .sv_flags = SV_ABI_FREEBSD | SV_ASLR | SV_IA32 | SV_ILP32 | + SV_SHP | SV_TIMEKEEP, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c index 06d2e60c40c..c1b4529e1d2 100644 --- a/sys/kern/imgact_elf.c +++ b/sys/kern/imgact_elf.c @@ -136,6 +136,27 @@ SYSCTL_INT(_kern_elf32, OID_AUTO, read_exec, CTLFLAG_RW, &i386_read_exec, 0, "enable execution from readable segments"); #endif +SYSCTL_NODE(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, aslr, CTLFLAG_RW, 0, + ""); +#define ASLR_NODE_OID __CONCAT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), _aslr) + +static int __elfN(aslr_enabled) = 0; +SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, enable, CTLFLAG_RWTUN, + &__elfN(aslr_enabled), 0, + __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) + ": enable address map randomization"); + +static int __elfN(pie_aslr_enabled) = 0; +SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, pie_enable, CTLFLAG_RWTUN, + &__elfN(pie_aslr_enabled), 0, + __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) + ": enable address map randomization for PIE binaries"); + +static int __elfN(aslr_honor_sbrk) = 1; +SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, honor_sbrk, CTLFLAG_RW, + &__elfN(aslr_honor_sbrk), 0, + __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) ": assume sbrk is used"); + static Elf_Brandinfo *elf_brand_list[MAX_BRANDS]; #define trunc_page_ps(va, ps) rounddown2(va, ps) @@ -773,6 +794,36 @@ fail: return (error); } +static u_long +__CONCAT(rnd_, __elfN(base))(vm_map_t map __unused, u_long minv, u_long maxv, + u_int align) +{ + u_long rbase, res; + + MPASS(vm_map_min(map) <= minv); + MPASS(maxv <= vm_map_max(map)); + MPASS(minv < maxv); + MPASS(minv + align < maxv); + arc4rand(&rbase, sizeof(rbase), 0); + res = roundup(minv, (u_long)align) + rbase % (maxv - minv); + res &= ~((u_long)align - 1); + if (res >= maxv) + res -= align; + KASSERT(res >= minv, + ("res %#lx < minv %#lx, maxv %#lx rbase %#lx", + res, minv, maxv, rbase)); + KASSERT(res < maxv, + ("res %#lx > maxv %#lx, minv %#lx rbase %#lx", + res, maxv, minv, rbase)); + return (res); +} + +/* + * Impossible et_dyn_addr initial value indicating that the real base + * must be calculated later with some randomization applied. + */ +#define ET_DYN_ADDR_RAND 1 + static int __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) { @@ -781,6 +832,7 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) const Elf_Phdr *phdr; Elf_Auxargs *elf_auxargs; struct vmspace *vmspace; + vm_map_t map; const char *err_str, *newinterp; char *interp, *interp_buf, *path; Elf_Brandinfo *brand_info; @@ -788,6 +840,7 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) vm_prot_t prot; u_long text_size, data_size, total_size, text_addr, data_addr; u_long seg_size, seg_addr, addr, baddr, et_dyn_addr, entry, proghdr; + u_long maxalign, mapsz, maxv, maxv1; uint32_t fctl0; int32_t osrel; int error, i, n, interp_name_len, have_interp; @@ -831,12 +884,17 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) err_str = newinterp = NULL; interp = interp_buf = NULL; td = curthread; + maxalign = PAGE_SIZE; + mapsz = 0; for (i = 0; i < hdr->e_phnum; i++) { switch (phdr[i].p_type) { case PT_LOAD: if (n == 0) baddr = phdr[i].p_vaddr; + if (phdr[i].p_align > maxalign) + maxalign = phdr[i].p_align; + mapsz += phdr[i].p_memsz; n++; break; case PT_INTERP: @@ -897,6 +955,7 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) error = ENOEXEC; goto ret; } + sv = brand_info->sysvec; et_dyn_addr = 0; if (hdr->e_type == ET_DYN) { if ((brand_info->flags & BI_CAN_EXEC_DYN) == 0) { @@ -908,10 +967,18 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) * Honour the base load address from the dso if it is * non-zero for some reason. */ - if (baddr == 0) - et_dyn_addr = ET_DYN_LOAD_ADDR; + if (baddr == 0) { + if ((sv->sv_flags & SV_ASLR) == 0 || + (fctl0 & NT_FREEBSD_FCTL_ASLR_DISABLE) != 0) + et_dyn_addr = ET_DYN_LOAD_ADDR; + else if ((__elfN(pie_aslr_enabled) && + (imgp->proc->p_flag2 & P2_ASLR_DISABLE) == 0) || + (imgp->proc->p_flag2 & P2_ASLR_ENABLE) != 0) + et_dyn_addr = ET_DYN_ADDR_RAND; + else + et_dyn_addr = ET_DYN_LOAD_ADDR; + } } - sv = brand_info->sysvec; if (interp != NULL && brand_info->interp_newpath != NULL) newinterp = brand_info->interp_newpath; @@ -928,9 +995,54 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) */ VOP_UNLOCK(imgp->vp, 0); + /* + * Decide whether to enable randomization of user mappings. + * First, reset user preferences for the setid binaries. + * Then, account for the support of the randomization by the + * ABI, by user preferences, and make special treatment for + * PIE binaries. + */ + if (imgp->credential_setid) { + PROC_LOCK(imgp->proc); + imgp->proc->p_flag2 &= ~(P2_ASLR_ENABLE | P2_ASLR_DISABLE); + PROC_UNLOCK(imgp->proc); + } + if ((sv->sv_flags & SV_ASLR) == 0 || + (imgp->proc->p_flag2 & P2_ASLR_DISABLE) != 0 || + (fctl0 & NT_FREEBSD_FCTL_ASLR_DISABLE) != 0) { + KASSERT(et_dyn_addr != ET_DYN_ADDR_RAND, + ("et_dyn_addr == RAND and !ASLR")); + } else if ((imgp->proc->p_flag2 & P2_ASLR_ENABLE) != 0 || + (__elfN(aslr_enabled) && hdr->e_type == ET_EXEC) || + et_dyn_addr == ET_DYN_ADDR_RAND) { + imgp->map_flags |= MAP_ASLR; + /* + * If user does not care about sbrk, utilize the bss + * grow region for mappings as well. We can select + * the base for the image anywere and still not suffer + * from the fragmentation. + */ + if (!__elfN(aslr_honor_sbrk) || + (imgp->proc->p_flag2 & P2_ASLR_IGNSTART) != 0) + imgp->map_flags |= MAP_ASLR_IGNSTART; + } + error = exec_new_vmspace(imgp, sv); + vmspace = imgp->proc->p_vmspace; + map = &vmspace->vm_map; + imgp->proc->p_sysent = sv; + maxv = vm_map_max(map) - lim_max(td, RLIMIT_STACK); + if (et_dyn_addr == ET_DYN_ADDR_RAND) { + KASSERT((map->flags & MAP_ASLR) != 0, + ("ET_DYN_ADDR_RAND but !MAP_ASLR")); + et_dyn_addr = __CONCAT(rnd_, __elfN(base))(map, + vm_map_min(map) + mapsz + lim_max(td, RLIMIT_DATA), + /* reserve half of the address space to interpreter */ + maxv / 2, 1UL << flsl(maxalign)); + } + vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY); if (error != 0) goto ret; @@ -1022,7 +1134,6 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) goto ret; } - vmspace = imgp->proc->p_vmspace; vmspace->vm_tsize = text_size >> PAGE_SHIFT; vmspace->vm_taddr = (caddr_t)(uintptr_t)text_addr; vmspace->vm_dsize = data_size >> PAGE_SHIFT; @@ -1036,6 +1147,14 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) */ addr = round_page((vm_offset_t)vmspace->vm_daddr + lim_max(td, RLIMIT_DATA)); + if ((map->flags & MAP_ASLR) != 0) { + maxv1 = maxv / 2 + addr / 2; + MPASS(maxv1 >= addr); /* No overflow */ + map->anon_loc = __CONCAT(rnd_, __elfN(base))(map, addr, maxv1, + MAXPAGESIZES > 1 ? pagesizes[1] : pagesizes[0]); + } else { + map->anon_loc = addr; + } PROC_UNLOCK(imgp->proc); imgp->entry_addr = entry; @@ -1043,6 +1162,13 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) if (interp != NULL) { have_interp = FALSE; VOP_UNLOCK(imgp->vp, 0); + if ((map->flags & MAP_ASLR) != 0) { + /* Assume that interpeter fits into 1/4 of AS */ + maxv1 = maxv / 2 + addr / 2; + MPASS(maxv1 >= addr); /* No overflow */ + addr = __CONCAT(rnd_, __elfN(base))(map, addr, + maxv1, PAGE_SIZE); + } if (brand_info->emul_path != NULL && brand_info->emul_path[0] != '\0') { path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index 29d16e5706e..6bef3f092e1 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -1104,9 +1104,13 @@ exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv) shmexit(vmspace); pmap_remove_pages(vmspace_pmap(vmspace)); vm_map_remove(map, vm_map_min(map), vm_map_max(map)); - /* An exec terminates mlockall(MCL_FUTURE). */ + /* + * An exec terminates mlockall(MCL_FUTURE), ASLR state + * must be re-evaluated. + */ vm_map_lock(map); - vm_map_modflags(map, 0, MAP_WIREFUTURE); + vm_map_modflags(map, 0, MAP_WIREFUTURE | MAP_ASLR | + MAP_ASLR_IGNSTART); vm_map_unlock(map); } else { error = vmspace_exec(p, sv_minuser, sv->sv_maxuser); @@ -1115,6 +1119,7 @@ exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv) vmspace = p->p_vmspace; map = &vmspace->vm_map; } + map->flags |= imgp->map_flags; /* Map a shared page */ obj = sv->sv_shared_page_obj; diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index a36fda16415..39307a573bb 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -466,7 +466,8 @@ do_fork(struct thread *td, struct fork_req *fr, struct proc *p2, struct thread * * Increase reference counts on shared objects. */ p2->p_flag = P_INMEM; - p2->p_flag2 = p1->p_flag2 & (P2_NOTRACE | P2_NOTRACE_EXEC | P2_TRAPCAP); + p2->p_flag2 = p1->p_flag2 & (P2_ASLR_DISABLE | P2_ASLR_ENABLE | + P2_ASLR_IGNSTART | P2_NOTRACE | P2_NOTRACE_EXEC | P2_TRAPCAP); p2->p_swtick = ticks; if (p1->p_flag & P_PROFIL) startprofclock(p2); diff --git a/sys/kern/kern_procctl.c b/sys/kern/kern_procctl.c index 4c3569afdfd..b0209432299 100644 --- a/sys/kern/kern_procctl.c +++ b/sys/kern/kern_procctl.c @@ -43,6 +43,11 @@ __FBSDID("$FreeBSD$"); #include #include +#include +#include +#include +#include + static int protect_setchild(struct thread *td, struct proc *p, int flags) { @@ -413,6 +418,62 @@ trapcap_status(struct thread *td, struct proc *p, int *data) return (0); } +static int +aslr_ctl(struct thread *td, struct proc *p, int state) +{ + + PROC_LOCK_ASSERT(p, MA_OWNED); + + switch (state) { + case PROC_ASLR_FORCE_ENABLE: + p->p_flag2 &= ~P2_ASLR_DISABLE; + p->p_flag2 |= P2_ASLR_ENABLE; + break; + case PROC_ASLR_FORCE_DISABLE: + p->p_flag2 |= P2_ASLR_DISABLE; + p->p_flag2 &= ~P2_ASLR_ENABLE; + break; + case PROC_ASLR_NOFORCE: + p->p_flag2 &= ~(P2_ASLR_ENABLE | P2_ASLR_DISABLE); + break; + default: + return (EINVAL); + } + return (0); +} + +static int +aslr_status(struct thread *td, struct proc *p, int *data) +{ + struct vmspace *vm; + int d; + + switch (p->p_flag2 & (P2_ASLR_ENABLE | P2_ASLR_DISABLE)) { + case 0: + d = PROC_ASLR_NOFORCE; + break; + case P2_ASLR_ENABLE: + d = PROC_ASLR_FORCE_ENABLE; + break; + case P2_ASLR_DISABLE: + d = PROC_ASLR_FORCE_DISABLE; + break; + } + if ((p->p_flag & P_WEXIT) == 0) { + _PHOLD(p); + PROC_UNLOCK(p); + vm = vmspace_acquire_ref(p); + if (vm != NULL && (vm->vm_map.flags & MAP_ASLR) != 0) { + d |= PROC_ASLR_ACTIVE; + vmspace_free(vm); + } + PROC_LOCK(p); + _PRELE(p); + } + *data = d; + return (0); +} + #ifndef _SYS_SYSPROTO_H_ struct procctl_args { idtype_t idtype; @@ -434,6 +495,7 @@ sys_procctl(struct thread *td, struct procctl_args *uap) int error, error1, flags, signum; switch (uap->com) { + case PROC_ASLR_CTL: case PROC_SPROTECT: case PROC_TRACE_CTL: case PROC_TRAPCAP_CTL: @@ -463,6 +525,7 @@ sys_procctl(struct thread *td, struct procctl_args *uap) return (error); data = &x.rk; break; + case PROC_ASLR_STATUS: case PROC_TRACE_STATUS: case PROC_TRAPCAP_STATUS: data = &flags; @@ -490,6 +553,7 @@ sys_procctl(struct thread *td, struct procctl_args *uap) if (error == 0) error = error1; break; + case PROC_ASLR_STATUS: case PROC_TRACE_STATUS: case PROC_TRAPCAP_STATUS: if (error == 0) @@ -509,6 +573,10 @@ kern_procctl_single(struct thread *td, struct proc *p, int com, void *data) PROC_LOCK_ASSERT(p, MA_OWNED); switch (com) { + case PROC_ASLR_CTL: + return (aslr_ctl(td, p, *(int *)data)); + case PROC_ASLR_STATUS: + return (aslr_status(td, p, data)); case PROC_SPROTECT: return (protect_set(td, p, *(int *)data)); case PROC_REAP_ACQUIRE: @@ -544,6 +612,8 @@ kern_procctl(struct thread *td, idtype_t idtype, id_t id, int com, void *data) bool tree_locked; switch (com) { + case PROC_ASLR_CTL: + case PROC_ASLR_STATUS: case PROC_REAP_ACQUIRE: case PROC_REAP_RELEASE: case PROC_REAP_STATUS: @@ -593,6 +663,8 @@ kern_procctl(struct thread *td, idtype_t idtype, id_t id, int com, void *data) sx_xlock(&proctree_lock); tree_locked = true; break; + case PROC_ASLR_CTL: + case PROC_ASLR_STATUS: case PROC_TRACE_STATUS: case PROC_TRAPCAP_STATUS: tree_locked = false; diff --git a/sys/sys/imgact.h b/sys/sys/imgact.h index 87d8fa84918..770ecfea491 100644 --- a/sys/sys/imgact.h +++ b/sys/sys/imgact.h @@ -89,6 +89,7 @@ struct image_params { u_long stack_sz; struct ucred *newcred; /* new credentials if changing */ bool credential_setid; /* true if becoming setid */ + u_int map_flags; }; #ifdef _KERNEL diff --git a/sys/sys/proc.h b/sys/sys/proc.h index 21b8a710701..7e67ec48e0a 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -756,6 +756,9 @@ struct proc { #define P2_AST_SU 0x00000008 /* Handles SU ast for kthreads. */ #define P2_PTRACE_FSTP 0x00000010 /* SIGSTOP from PT_ATTACH not yet handled. */ #define P2_TRAPCAP 0x00000020 /* SIGTRAP on ENOTCAPABLE */ +#define P2_ASLR_ENABLE 0x00000040 /* Force enable ASLR. */ +#define P2_ASLR_DISABLE 0x00000080 /* Force disable ASLR. */ +#define P2_ASLR_IGNSTART 0x00000100 /* Enable ASLR to consume sbrk area. */ /* Flags protected by proctree_lock, kept in p_treeflags. */ #define P_TREE_ORPHANED 0x00000001 /* Reparented, on orphan list */ diff --git a/sys/sys/procctl.h b/sys/sys/procctl.h index 3a2d4f0269e..1f519454e96 100644 --- a/sys/sys/procctl.h +++ b/sys/sys/procctl.h @@ -53,6 +53,8 @@ #define PROC_TRAPCAP_STATUS 10 /* query trap capability status */ #define PROC_PDEATHSIG_CTL 11 /* set parent death signal */ #define PROC_PDEATHSIG_STATUS 12 /* get parent death signal */ +#define PROC_ASLR_CTL 13 /* en/dis ASLR */ +#define PROC_ASLR_STATUS 14 /* query ASLR status */ /* Operations for PROC_SPROTECT (passed in integer arg). */ #define PPROT_OP(x) ((x) & 0xf) @@ -116,6 +118,11 @@ struct procctl_reaper_kill { #define PROC_TRAPCAP_CTL_ENABLE 1 #define PROC_TRAPCAP_CTL_DISABLE 2 +#define PROC_ASLR_FORCE_ENABLE 1 +#define PROC_ASLR_FORCE_DISABLE 2 +#define PROC_ASLR_NOFORCE 3 +#define PROC_ASLR_ACTIVE 0x80000000 + #ifndef _KERNEL __BEGIN_DECLS int procctl(idtype_t, id_t, int, void *); diff --git a/sys/sys/sysent.h b/sys/sys/sysent.h index 3afaf01449c..6f89d39c0f8 100644 --- a/sys/sys/sysent.h +++ b/sys/sys/sysent.h @@ -144,6 +144,7 @@ struct sysentvec { #define SV_SHP 0x010000 /* Shared page. */ #define SV_CAPSICUM 0x020000 /* Force cap_enter() on startup. */ #define SV_TIMEKEEP 0x040000 /* Shared page timehands. */ +#define SV_ASLR 0x080000 /* ASLR allowed. */ #define SV_ABI_MASK 0xff #define SV_ABI_ERRNO(p, e) ((p)->p_sysent->sv_errsize <= 0 ? e : \ diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index ad08d212ebc..01544c8bf00 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -801,6 +801,7 @@ _vm_map_init(vm_map_t map, pmap_t pmap, vm_offset_t min, vm_offset_t max) map->root = NULL; map->timestamp = 0; map->busy = 0; + map->anon_loc = 0; } void @@ -1480,6 +1481,21 @@ vm_map_fixed(vm_map_t map, vm_object_t object, vm_ooffset_t offset, return (result); } +static const int aslr_pages_rnd_64[2] = {0x1000, 0x10}; +static const int aslr_pages_rnd_32[2] = {0x100, 0x4}; + +static int cluster_anon = 1; +SYSCTL_INT(_vm, OID_AUTO, cluster_anon, CTLFLAG_RW, + &cluster_anon, 0, + "Cluster anonymous mappings"); + +static long aslr_restarts; +SYSCTL_LONG(_vm, OID_AUTO, aslr_restarts, CTLFLAG_RD, + &aslr_restarts, 0, + "Number of aslr failures"); + +#define MAP_32BIT_MAX_ADDR ((vm_offset_t)1 << 31) + /* * Searches for the specified amount of free space in the given map with the * specified alignment. Performs an address-ordered, first-fit search from @@ -1559,8 +1575,9 @@ vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset, vm_size_t length, vm_offset_t max_addr, int find_space, vm_prot_t prot, vm_prot_t max, int cow) { - vm_offset_t alignment, min_addr; - int rv; + vm_offset_t alignment, curr_min_addr, min_addr; + int gap, pidx, rv, try; + bool cluster, en_aslr, update_anon; KASSERT((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) == 0 || object == NULL, @@ -1575,24 +1592,96 @@ vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset, alignment = (vm_offset_t)1 << (find_space >> 8); } else alignment = 0; + en_aslr = (map->flags & MAP_ASLR) != 0; + update_anon = cluster = cluster_anon != 0 && + (map->flags & MAP_IS_SUB_MAP) == 0 && max_addr == 0 && + find_space != VMFS_NO_SPACE && object == NULL && + (cow & (MAP_INHERIT_SHARE | MAP_STACK_GROWS_UP | + MAP_STACK_GROWS_DOWN)) == 0 && prot != PROT_NONE; + curr_min_addr = min_addr = *addr; + if (en_aslr && min_addr == 0 && !cluster && + find_space != VMFS_NO_SPACE && + (map->flags & MAP_ASLR_IGNSTART) != 0) + curr_min_addr = min_addr = vm_map_min(map); + try = 0; vm_map_lock(map); + if (cluster) { + curr_min_addr = map->anon_loc; + if (curr_min_addr == 0) + cluster = false; + } if (find_space != VMFS_NO_SPACE) { KASSERT(find_space == VMFS_ANY_SPACE || find_space == VMFS_OPTIMAL_SPACE || find_space == VMFS_SUPER_SPACE || alignment != 0, ("unexpected VMFS flag")); - min_addr = *addr; again: - if (vm_map_findspace(map, min_addr, length, addr) || + /* + * When creating an anonymous mapping, try clustering + * with an existing anonymous mapping first. + * + * We make up to two attempts to find address space + * for a given find_space value. The first attempt may + * apply randomization or may cluster with an existing + * anonymous mapping. If this first attempt fails, + * perform a first-fit search of the available address + * space. + * + * If all tries failed, and find_space is + * VMFS_OPTIMAL_SPACE, fallback to VMFS_ANY_SPACE. + * Again enable clustering and randomization. + */ + try++; + MPASS(try <= 2); + + if (try == 2) { + /* + * Second try: we failed either to find a + * suitable region for randomizing the + * allocation, or to cluster with an existing + * mapping. Retry with free run. + */ + curr_min_addr = (map->flags & MAP_ASLR_IGNSTART) != 0 ? + vm_map_min(map) : min_addr; + atomic_add_long(&aslr_restarts, 1); + } + + if (try == 1 && en_aslr && !cluster) { + /* + * Find space for allocation, including + * gap needed for later randomization. + */ + pidx = MAXPAGESIZES > 1 && pagesizes[1] != 0 && + (find_space == VMFS_SUPER_SPACE || find_space == + VMFS_OPTIMAL_SPACE) ? 1 : 0; + gap = vm_map_max(map) > MAP_32BIT_MAX_ADDR && + (max_addr == 0 || max_addr > MAP_32BIT_MAX_ADDR) ? + aslr_pages_rnd_64[pidx] : aslr_pages_rnd_32[pidx]; + if (vm_map_findspace(map, curr_min_addr, length + + gap * pagesizes[pidx], addr) || + (max_addr != 0 && *addr + length > max_addr)) + goto again; + /* And randomize the start address. */ + *addr += (arc4random() % gap) * pagesizes[pidx]; + } else if (vm_map_findspace(map, curr_min_addr, length, addr) || (max_addr != 0 && *addr + length > max_addr)) { + if (cluster) { + cluster = false; + MPASS(try == 1); + goto again; + } rv = KERN_NO_SPACE; goto done; } + if (find_space != VMFS_ANY_SPACE && (rv = vm_map_alignspace(map, object, offset, addr, length, max_addr, alignment)) != KERN_SUCCESS) { if (find_space == VMFS_OPTIMAL_SPACE) { find_space = VMFS_ANY_SPACE; + curr_min_addr = min_addr; + cluster = update_anon; + try = 0; goto again; } goto done; @@ -1613,6 +1702,8 @@ again: rv = vm_map_insert(map, object, offset, *addr, *addr + length, prot, max, cow); } + if (rv == KERN_SUCCESS && update_anon) + map->anon_loc = *addr + length; done: vm_map_unlock(map); return (rv); @@ -1922,7 +2013,13 @@ vm_map_submap( vm_map_t submap) { vm_map_entry_t entry; - int result = KERN_INVALID_ARGUMENT; + int result; + + result = KERN_INVALID_ARGUMENT; + + vm_map_lock(submap); + submap->flags |= MAP_IS_SUB_MAP; + vm_map_unlock(submap); vm_map_lock(map); @@ -1944,6 +2041,11 @@ vm_map_submap( } vm_map_unlock(map); + if (result != KERN_SUCCESS) { + vm_map_lock(submap); + submap->flags &= ~MAP_IS_SUB_MAP; + vm_map_unlock(submap); + } return (result); } @@ -3170,6 +3272,9 @@ vm_map_delete(vm_map_t map, vm_offset_t start, vm_offset_t end) entry->object.vm_object != NULL) pmap_remove(map->pmap, entry->start, entry->end); + if (entry->end == map->anon_loc) + map->anon_loc = entry->start; + /* * Delete the entry only after removing all pmap * entries pointing to its pages. (Otherwise, its @@ -3443,6 +3548,7 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge) locked = vm_map_trylock(new_map); /* trylock to silence WITNESS */ KASSERT(locked, ("vmspace_fork: lock failed")); + new_map->anon_loc = old_map->anon_loc; old_entry = old_map->header.next; while (old_entry != &old_map->header) { diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h index c83a68ba589..6e0f3729328 100644 --- a/sys/vm/vm_map.h +++ b/sys/vm/vm_map.h @@ -202,6 +202,7 @@ struct vm_map { vm_flags_t flags; /* flags for this vm_map */ vm_map_entry_t root; /* Root of a binary search tree */ pmap_t pmap; /* (c) Physical map */ + vm_offset_t anon_loc; int busy; }; @@ -210,6 +211,9 @@ struct vm_map { */ #define MAP_WIREFUTURE 0x01 /* wire all future pages */ #define MAP_BUSY_WAKEUP 0x02 +#define MAP_IS_SUB_MAP 0x04 /* has parent */ +#define MAP_ASLR 0x08 /* enabled ASLR */ +#define MAP_ASLR_IGNSTART 0x10 #ifdef _KERNEL #if defined(KLD_MODULE) && !defined(KLD_TIED) diff --git a/usr.bin/proccontrol/proccontrol.c b/usr.bin/proccontrol/proccontrol.c index 4cb37018c41..3c0ad53e752 100644 --- a/usr.bin/proccontrol/proccontrol.c +++ b/usr.bin/proccontrol/proccontrol.c @@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$"); #include enum { + MODE_ASLR, MODE_INVALID, MODE_TRACE, MODE_TRAPCAP, @@ -62,7 +63,7 @@ static void __dead2 usage(void) { - fprintf(stderr, "Usage: proccontrol -m (trace|trapcap) [-q] " + fprintf(stderr, "Usage: proccontrol -m (aslr|trace|trapcap) [-q] " "[-s (enable|disable)] [-p pid | command]\n"); exit(1); } @@ -81,7 +82,9 @@ main(int argc, char *argv[]) while ((ch = getopt(argc, argv, "m:qs:p:")) != -1) { switch (ch) { case 'm': - if (strcmp(optarg, "trace") == 0) + if (strcmp(optarg, "aslr") == 0) + mode = MODE_ASLR; + else if (strcmp(optarg, "trace") == 0) mode = MODE_TRACE; else if (strcmp(optarg, "trapcap") == 0) mode = MODE_TRAPCAP; @@ -121,6 +124,9 @@ main(int argc, char *argv[]) if (query) { switch (mode) { + case MODE_ASLR: + error = procctl(P_PID, pid, PROC_ASLR_STATUS, &arg); + break; case MODE_TRACE: error = procctl(P_PID, pid, PROC_TRACE_STATUS, &arg); break; @@ -134,6 +140,23 @@ main(int argc, char *argv[]) if (error != 0) err(1, "procctl status"); switch (mode) { + case MODE_ASLR: + switch (arg & ~PROC_ASLR_ACTIVE) { + case PROC_ASLR_FORCE_ENABLE: + printf("force enabled"); + break; + case PROC_ASLR_FORCE_DISABLE: + printf("force disabled"); + break; + case PROC_ASLR_NOFORCE: + printf("not forced"); + break; + } + if ((arg & PROC_ASLR_ACTIVE) != 0) + printf(", active\n"); + else + printf(", not active\n"); + break; case MODE_TRACE: if (arg == -1) printf("disabled\n"); @@ -155,6 +178,11 @@ main(int argc, char *argv[]) } } else { switch (mode) { + case MODE_ASLR: + arg = enable ? PROC_ASLR_FORCE_ENABLE : + PROC_ASLR_FORCE_DISABLE; + error = procctl(P_PID, pid, PROC_ASLR_CTL, &arg); + break; case MODE_TRACE: arg = enable ? PROC_TRACE_CTL_ENABLE : PROC_TRACE_CTL_DISABLE; From 9492d971eb62e215750ebbfeb060029d3e5d5436 Mon Sep 17 00:00:00 2001 From: Michal Meloun Date: Sun, 10 Feb 2019 18:28:37 +0000 Subject: [PATCH 09/93] Fix bug introduced by r343962. DMAMAP_DMAMEM_ALLOC is property of dmamap, not dmatag. MFC after: 1 week Reported by: ian Pointy hat: mmel --- sys/arm/arm/busdma_machdep-v6.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sys/arm/arm/busdma_machdep-v6.c b/sys/arm/arm/busdma_machdep-v6.c index ce7e8acdfea..e54f3c0c7ca 100644 --- a/sys/arm/arm/busdma_machdep-v6.c +++ b/sys/arm/arm/busdma_machdep-v6.c @@ -350,13 +350,13 @@ might_bounce(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t addr, bus_size_t size) { - KASSERT(dmat->flags & DMAMAP_DMAMEM_ALLOC || + KASSERT(map->flags & DMAMAP_DMAMEM_ALLOC || dmat->alignment <= PAGE_SIZE, ("%s: unsupported alignment (0x%08lx) for buffer not " "allocated by bus_dmamem_alloc()", __func__, dmat->alignment)); - return (!(dmat->flags & DMAMAP_DMAMEM_ALLOC) && + return (!(map->flags & DMAMAP_DMAMEM_ALLOC) && ((dmat->flags & BUS_DMA_EXCL_BOUNCE) || alignment_bounce(dmat, addr) || cacheline_bounce(map, addr, size))); From f6d281e8aaad3f89c48b5c4b637fce625e8e5a87 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Sun, 10 Feb 2019 19:01:05 +0000 Subject: [PATCH 10/93] struct xswdev on amd64 requires compat32 shims after ino64. i386 is the only architecture where uint64_t does not specify 8-bytes alignment, which makes struct xswdev layout not compatible between 64bit and i386. Reported and tested by: pho Sponsored by: The FreeBSD Foundation MFC after: 1 week --- sys/vm/swap_pager.c | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c index ea0a61ab276..99332a45325 100644 --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -2478,10 +2478,23 @@ struct xswdev11 { }; #endif +#if defined(__amd64__) && defined(COMPAT_FREEBSD32) +struct xswdev32 { + u_int xsw_version; + u_int xsw_dev1, xsw_dev2; + int xsw_flags; + int xsw_nblks; + int xsw_used; +}; +#endif + static int sysctl_vm_swap_info(SYSCTL_HANDLER_ARGS) { struct xswdev xs; +#if defined(__amd64__) && defined(COMPAT_FREEBSD32) + struct xswdev32 xs32; +#endif #if defined(COMPAT_FREEBSD11) struct xswdev11 xs11; #endif @@ -2492,6 +2505,18 @@ sysctl_vm_swap_info(SYSCTL_HANDLER_ARGS) error = swap_dev_info(*(int *)arg1, &xs, NULL, 0); if (error != 0) return (error); +#if defined(__amd64__) && defined(COMPAT_FREEBSD32) + if (req->oldlen == sizeof(xs32)) { + xs32.xsw_version = XSWDEV_VERSION; + xs32.xsw_dev1 = xs.xsw_dev; + xs32.xsw_dev2 = xs.xsw_dev >> 32; + xs32.xsw_flags = xs.xsw_flags; + xs32.xsw_nblks = xs.xsw_nblks; + xs32.xsw_used = xs.xsw_used; + error = SYSCTL_OUT(req, &xs32, sizeof(xs32)); + return (error); + } +#endif #if defined(COMPAT_FREEBSD11) if (req->oldlen == sizeof(xs11)) { xs11.xsw_version = XSWDEV_VERSION_11; @@ -2500,9 +2525,10 @@ sysctl_vm_swap_info(SYSCTL_HANDLER_ARGS) xs11.xsw_nblks = xs.xsw_nblks; xs11.xsw_used = xs.xsw_used; error = SYSCTL_OUT(req, &xs11, sizeof(xs11)); - } else + return (error); + } #endif - error = SYSCTL_OUT(req, &xs, sizeof(xs)); + error = SYSCTL_OUT(req, &xs, sizeof(xs)); return (error); } From 9b9a527843591d3e269e68c42932820759d2f416 Mon Sep 17 00:00:00 2001 From: Sergey Kandaurov Date: Sun, 10 Feb 2019 19:07:47 +0000 Subject: [PATCH 11/93] Sync "struct addrinfo" declaration with netdb.h. Notably, unlike in OpenBSD, which the man page was copied from, ai_canonname and ai_addr come in different order. PR: 225880 MFC after: 1 week --- lib/libc/net/getaddrinfo.3 | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/lib/libc/net/getaddrinfo.3 b/lib/libc/net/getaddrinfo.3 index fdfaddb98b0..bd0084db2b3 100644 --- a/lib/libc/net/getaddrinfo.3 +++ b/lib/libc/net/getaddrinfo.3 @@ -18,7 +18,7 @@ .\" .\" $FreeBSD$ .\" -.Dd September 13, 2017 +.Dd February 10, 2019 .Dt GETADDRINFO 3 .Os .Sh NAME @@ -78,14 +78,14 @@ as defined by .Aq Pa netdb.h : .Bd -literal struct addrinfo { - int ai_flags; /* input flags */ - int ai_family; /* address family for socket */ - int ai_socktype; /* socket type */ - int ai_protocol; /* protocol for socket */ - socklen_t ai_addrlen; /* length of socket-address */ - struct sockaddr *ai_addr; /* socket-address for socket */ - char *ai_canonname; /* canonical name for service location */ - struct addrinfo *ai_next; /* pointer to next in list */ + int ai_flags; /* AI_PASSIVE, AI_CANONNAME, .. */ + int ai_family; /* AF_xxx */ + int ai_socktype; /* SOCK_xxx */ + int ai_protocol; /* 0 or IPPROTO_xxx for IPv4 and IPv6 */ + socklen_t ai_addrlen; /* length of ai_addr */ + char *ai_canonname; /* canonical name for hostname */ + struct sockaddr *ai_addr; /* binary address */ + struct addrinfo *ai_next; /* next structure in linked list */ }; .Ed .Pp From 3c25d4ea3c76f81f266c9fbf14621144d98cc707 Mon Sep 17 00:00:00 2001 From: Navdeep Parhar Date: Sun, 10 Feb 2019 19:20:03 +0000 Subject: [PATCH 12/93] cxgbe(4): Ignore unused interrupts. Sponsored by: Chelsio Communications --- sys/dev/cxgbe/common/t4_hw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sys/dev/cxgbe/common/t4_hw.c b/sys/dev/cxgbe/common/t4_hw.c index 39c2aae6357..5c587f04da7 100644 --- a/sys/dev/cxgbe/common/t4_hw.c +++ b/sys/dev/cxgbe/common/t4_hw.c @@ -5306,6 +5306,7 @@ void t4_intr_enable(struct adapter *adap) F_EGRESS_SIZE_ERR; t4_set_reg_field(adap, A_SGE_INT_ENABLE3, val, val); t4_write_reg(adap, MYPF_REG(A_PL_PF_INT_ENABLE), PF_INTR_MASK); + t4_set_reg_field(adap, A_PL_INT_ENABLE, F_SF | F_I2CM, 0); t4_set_reg_field(adap, A_PL_INT_MAP0, 0, 1 << adap->pf); } From f68992cf666e80664c62e32af24eb5280c74dcd3 Mon Sep 17 00:00:00 2001 From: Nathan Whitehorn Date: Sun, 10 Feb 2019 20:13:59 +0000 Subject: [PATCH 13/93] Performance improvements for octe(4): - Distribute RX load across multiple cores, if present. This reverts r217212, which is no longer relevant (I think because of the newer SDK). - Use newer APIs for pinning taskqueue entries to specific cores. - Deepen RX buffers. This more than doubles NAT forwarding throughput on my EdgeRouter Lite from, with typical packet mixture, 90 Mbps to over 200 Mbps. The result matches forwarding throughput in Linux without the UBNT hardware offload on the same hardware, and thus likely reflects hardware limits. Reviewed by: jhibbits --- sys/mips/cavium/octe/ethernet-defines.h | 4 ++-- sys/mips/cavium/octe/ethernet-rx.c | 25 ++++++++++--------------- sys/mips/cavium/octe/ethernet.c | 4 ++-- 3 files changed, 14 insertions(+), 19 deletions(-) diff --git a/sys/mips/cavium/octe/ethernet-defines.h b/sys/mips/cavium/octe/ethernet-defines.h index 308233f9ff2..1be9d54b01e 100644 --- a/sys/mips/cavium/octe/ethernet-defines.h +++ b/sys/mips/cavium/octe/ethernet-defines.h @@ -38,14 +38,14 @@ AND WITH ALL FAULTS AND CAVIUM NETWORKS MAKES NO PROMISES, REPRESENTATIONS OR W * the driver uses the default from below. */ -#define INTERRUPT_LIMIT 10000 /* Max interrupts per second per core */ +#define INTERRUPT_LIMIT 1000 /* Max interrupts per second per core */ /*#define INTERRUPT_LIMIT 0 *//* Don't limit the number of interrupts */ #define USE_RED 1 /* Enable Random Early Dropping under load */ #define USE_10MBPS_PREAMBLE_WORKAROUND 1 /* Allow SW based preamble removal at 10Mbps to workaround PHYs giving us bad preambles */ #define DONT_WRITEBACK(x) (x) /* Use this to have all FPA frees also tell the L2 not to write data to memory */ /*#define DONT_WRITEBACK(x) 0 *//* Use this to not have FPA frees control L2 */ -#define MAX_RX_PACKETS 120 /* Maximum number of packets to process per interrupt. */ +#define MAX_RX_PACKETS 1024 /* Maximum number of packets to process per interrupt. */ #define MAX_OUT_QUEUE_DEPTH 1000 #define FAU_NUM_PACKET_BUFFERS_TO_FREE (CVMX_FAU_REG_END - sizeof(uint32_t)) diff --git a/sys/mips/cavium/octe/ethernet-rx.c b/sys/mips/cavium/octe/ethernet-rx.c index 5c07da49c4b..159f529e5e6 100644 --- a/sys/mips/cavium/octe/ethernet-rx.c +++ b/sys/mips/cavium/octe/ethernet-rx.c @@ -57,8 +57,6 @@ extern struct ifnet *cvm_oct_device[]; static struct task cvm_oct_task; static struct taskqueue *cvm_oct_taskq; -static int cvm_oct_rx_active; - /** * Interrupt handler. The interrupt occurs whenever the POW * transitions from 0->1 packets in our group. @@ -77,10 +75,9 @@ int cvm_oct_do_interrupt(void *dev_id) cvmx_write_csr(CVMX_POW_WQ_INT, 0x10001< Date: Sun, 10 Feb 2019 20:21:20 +0000 Subject: [PATCH 14/93] powerpc: Clamp MAXCPU for MPC85XXSPE kernel to 2 SoCs with e500v2 chips only have at most 2 cores, and there are no plans to release any more e500v2-based SoCs. Clamping MAXCPU down to 2 saves 5MB of data, and 1.5MB bss. --- sys/powerpc/conf/MPC85XXSPE | 1 + 1 file changed, 1 insertion(+) diff --git a/sys/powerpc/conf/MPC85XXSPE b/sys/powerpc/conf/MPC85XXSPE index 08b1eee4342..8a7d0e74f6b 100644 --- a/sys/powerpc/conf/MPC85XXSPE +++ b/sys/powerpc/conf/MPC85XXSPE @@ -16,6 +16,7 @@ makeoptions WITH_CTF=1 makeoptions WERROR="-Werror -Wno-format -Wno-redundant-decls" options FPU_EMU +options MAXCPU=2 options _KPOSIX_PRIORITY_SCHEDULING options ALT_BREAK_TO_DEBUGGER From 13c62c50e345a24aa0a4a14c7eb167ec668266bd Mon Sep 17 00:00:00 2001 From: Kyle Evans Date: Sun, 10 Feb 2019 21:19:09 +0000 Subject: [PATCH 15/93] libbe(3): Add a destroy option for removing the origin Currently origin snapshots are left behind when a BE is destroyed, whether it was an auto-created snapshot or explicitly specified via, for example, `bectl create -e be@mysnap ...`. Removing it automatically could be argued as a POLA violation in some circumstances, so provide a flag to be_destroy for it. An accompanying option will be added to bectl(8) to utilize this. Some minor style/consistency nits in the affected areas also addressed. Reported by: Shawn Webb MFC after: 1 week --- lib/libbe/be.c | 26 +++++++++++++++++++++----- lib/libbe/be.h | 5 +++-- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/lib/libbe/be.c b/lib/libbe/be.c index b07e791805b..cf13e9fec9b 100644 --- a/lib/libbe/be.c +++ b/lib/libbe/be.c @@ -203,13 +203,14 @@ be_destroy_cb(zfs_handle_t *zfs_hdl, void *data) int be_destroy(libbe_handle_t *lbh, const char *name, int options) { + char origin[BE_MAXPATHLEN], path[BE_MAXPATHLEN]; zfs_handle_t *fs; - char path[BE_MAXPATHLEN]; char *p; int err, force, mounted; p = path; force = options & BE_DESTROY_FORCE; + *origin = '\0'; be_root_concat(lbh, name, path); @@ -222,17 +223,21 @@ be_destroy(libbe_handle_t *lbh, const char *name, int options) return (set_error(lbh, BE_ERR_DESTROYACT)); fs = zfs_open(lbh->lzh, p, ZFS_TYPE_FILESYSTEM); + if (fs == NULL) + return (set_error(lbh, BE_ERR_ZFSOPEN)); + if ((options & BE_DESTROY_ORIGIN) != 0 && + zfs_prop_get(fs, ZFS_PROP_ORIGIN, origin, sizeof(origin), + NULL, NULL, 0, 1) != 0) + return (set_error(lbh, BE_ERR_NOORIGIN)); } else { - if (!zfs_dataset_exists(lbh->lzh, path, ZFS_TYPE_SNAPSHOT)) return (set_error(lbh, BE_ERR_NOENT)); fs = zfs_open(lbh->lzh, p, ZFS_TYPE_SNAPSHOT); + if (fs == NULL) + return (set_error(lbh, BE_ERR_ZFSOPEN)); } - if (fs == NULL) - return (set_error(lbh, BE_ERR_ZFSOPEN)); - /* Check if mounted, unmount if force is specified */ if ((mounted = zfs_is_mounted(fs, NULL)) != 0) { if (force) @@ -248,6 +253,17 @@ be_destroy(libbe_handle_t *lbh, const char *name, int options) return (set_error(lbh, BE_ERR_UNKNOWN)); } + if (*origin != '\0') { + fs = zfs_open(lbh->lzh, origin, ZFS_TYPE_SNAPSHOT); + if (fs == NULL) + return (set_error(lbh, BE_ERR_ZFSOPEN)); + err = zfs_destroy(fs, false); + if (err == EBUSY) + return (set_error(lbh, BE_ERR_DESTROYMNT)); + else if (err != 0) + return (set_error(lbh, BE_ERR_UNKNOWN)); + } + return (0); } diff --git a/lib/libbe/be.h b/lib/libbe/be.h index 265ce263cf5..dcf336d7423 100644 --- a/lib/libbe/be.h +++ b/lib/libbe/be.h @@ -93,7 +93,8 @@ int be_rename(libbe_handle_t *, const char *, const char *); /* Bootenv removal functions */ typedef enum { - BE_DESTROY_FORCE = 1 << 0, + BE_DESTROY_FORCE = 1 << 0, + BE_DESTROY_ORIGIN = 1 << 1, } be_destroy_opt_t; int be_destroy(libbe_handle_t *, const char *, int); @@ -102,7 +103,7 @@ int be_destroy(libbe_handle_t *, const char *, int); typedef enum { BE_MNT_FORCE = 1 << 0, - BE_MNT_DEEP = 1 << 1, + BE_MNT_DEEP = 1 << 1, } be_mount_opt_t; int be_mount(libbe_handle_t *, char *, char *, int, char *); From 4c8fb952b525c16cc4e76d20556625a0bb36706a Mon Sep 17 00:00:00 2001 From: Kristof Provost Date: Sun, 10 Feb 2019 21:22:55 +0000 Subject: [PATCH 16/93] pfctl: Fix ifa_grouplookup() Setting the length of the request got lost in r343287, which means SIOCGIFGMEMB gives us the required length, but does not copy the names of the group members. As a result we don't get a correct list of group members, and 'set skip on ' broke. This produced all sorts of very unexpected results, because we would end up applying 'set skip' to unexpected interfaces. X-MFC-with: r343287 --- sbin/pfctl/pfctl_parser.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sbin/pfctl/pfctl_parser.c b/sbin/pfctl/pfctl_parser.c index c986cf15e49..770153c3922 100644 --- a/sbin/pfctl/pfctl_parser.c +++ b/sbin/pfctl/pfctl_parser.c @@ -1408,6 +1408,7 @@ ifa_grouplookup(char *ifa_name, int flags) return (NULL); bzero(&ifgr, sizeof(ifgr)); strlcpy(ifgr.ifgr_name, ifa_name, sizeof(ifgr.ifgr_name)); + ifgr.ifgr_len = len; if ((ifgr.ifgr_groups = calloc(1, len)) == NULL) err(1, "calloc"); if (ioctl(s, SIOCGIFGMEMB, (caddr_t)&ifgr) == -1) From 345c692d18c33c9b7706012c6ff214dc5128bbdc Mon Sep 17 00:00:00 2001 From: Marius Strobl Date: Sun, 10 Feb 2019 21:27:03 +0000 Subject: [PATCH 17/93] As struct cryptop is wrapped in #ifdef _KERNEL, userland doesn't need to drag in either. --- sys/opencrypto/cryptodev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/opencrypto/cryptodev.h b/sys/opencrypto/cryptodev.h index 6431e6d82e1..fe13539eca0 100644 --- a/sys/opencrypto/cryptodev.h +++ b/sys/opencrypto/cryptodev.h @@ -63,10 +63,10 @@ #define _CRYPTO_CRYPTO_H_ #include -#include #ifdef _KERNEL #include +#include #endif /* Some initial values */ From 2a0f9d54162cfb8dbf1cd4cc5dd59aaa9f98a08e Mon Sep 17 00:00:00 2001 From: Andriy Voskoboinyk Date: Sun, 10 Feb 2019 21:32:39 +0000 Subject: [PATCH 18/93] ifconfig(8): display 802.11n rates correctly for 'roam:rate' parameter MFC after: 5 days --- sbin/ifconfig/ifieee80211.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sbin/ifconfig/ifieee80211.c b/sbin/ifconfig/ifieee80211.c index c0649e33e78..dcb91e0441d 100644 --- a/sbin/ifconfig/ifieee80211.c +++ b/sbin/ifconfig/ifieee80211.c @@ -5080,7 +5080,9 @@ end: LINE_CHECK("roam:rssi %u.5", rp->rssi/2); else LINE_CHECK("roam:rssi %u", rp->rssi/2); - LINE_CHECK("roam:rate %u", rp->rate/2); + LINE_CHECK("roam:rate %s%u", + (rp->rate & IEEE80211_RATE_MCS) ? "MCS " : "", + get_rate_value(rp->rate)); } else { LINE_BREAK(); list_roam(s); From aac5464b6108116bc4172b47f54854f6f85c7a64 Mon Sep 17 00:00:00 2001 From: Jilles Tjoelker Date: Sun, 10 Feb 2019 22:23:05 +0000 Subject: [PATCH 19/93] sh: Restore $((x)) error checking after fix for $((-9223372036854775808)) SVN r342880 was designed to fix $((-9223372036854775808)) and things like $((0x8000000000000000)) but also broke error detection for values of variables without dollar sign ($((x))). For compatibility, overflow in plain literals continues to be ignored and the value is clamped to the boundary (except 9223372036854775808 which is changed to -9223372036854775808). Reviewed by: se (although he would like error checking to be removed) MFC after: 2 weeks X-MFC-with: r342880 Differential Revision: https://reviews.freebsd.org/D18926 --- bin/sh/arith_yacc.c | 2 +- bin/sh/arith_yacc.h | 1 + bin/sh/arith_yylex.c | 30 +++++++++++++++++++++++++++++- bin/sh/shell.h | 1 - bin/sh/tests/expansion/Makefile | 2 ++ bin/sh/tests/expansion/arith16.0 | 26 ++++++++++++++++++++++++++ bin/sh/tests/expansion/arith17.0 | 3 +++ 7 files changed, 62 insertions(+), 3 deletions(-) create mode 100644 bin/sh/tests/expansion/arith16.0 create mode 100644 bin/sh/tests/expansion/arith17.0 diff --git a/bin/sh/arith_yacc.c b/bin/sh/arith_yacc.c index 5000c6b0d1b..a08163bdc29 100644 --- a/bin/sh/arith_yacc.c +++ b/bin/sh/arith_yacc.c @@ -104,7 +104,7 @@ static arith_t arith_lookupvarint(char *varname) if (str == NULL || *str == '\0') str = "0"; errno = 0; - result = strtoarith_t(str, &p, 0); + result = strtoarith_t(str, &p); if (errno != 0 || *p != '\0') yyerror("variable conversion error"); return result; diff --git a/bin/sh/arith_yacc.h b/bin/sh/arith_yacc.h index ca92e6f7fa1..7fdd99b75b5 100644 --- a/bin/sh/arith_yacc.h +++ b/bin/sh/arith_yacc.h @@ -90,4 +90,5 @@ union yystype { extern union yystype yylval; +arith_t strtoarith_t(const char *restrict nptr, char **restrict endptr); int yylex(void); diff --git a/bin/sh/arith_yylex.c b/bin/sh/arith_yylex.c index f7eaf3e3fa6..9f320dbcb58 100644 --- a/bin/sh/arith_yylex.c +++ b/bin/sh/arith_yylex.c @@ -35,6 +35,8 @@ #include __FBSDID("$FreeBSD$"); +#include +#include #include #include #include @@ -50,6 +52,32 @@ __FBSDID("$FreeBSD$"); #error Arithmetic tokens are out of order. #endif +arith_t +strtoarith_t(const char *restrict nptr, char **restrict endptr) +{ + arith_t val; + + while (isspace((unsigned char)*nptr)) + nptr++; + switch (*nptr) { + case '-': + return strtoimax(nptr, endptr, 0); + case '0': + return (arith_t)strtoumax(nptr, endptr, 0); + default: + val = (arith_t)strtoumax(nptr, endptr, 0); + if (val >= 0) + return val; + else if (val == ARITH_MIN) { + errno = ERANGE; + return ARITH_MIN; + } else { + errno = ERANGE; + return ARITH_MAX; + } + } +} + int yylex(void) { @@ -78,7 +106,7 @@ yylex(void) case '7': case '8': case '9': - yylval.val = strtoarith_t(buf, &end, 0); + yylval.val = strtoarith_t(buf, &end); arith_buf = end; return ARITH_NUM; case 'A': diff --git a/bin/sh/shell.h b/bin/sh/shell.h index 9e2f9ab6e73..c06e737e658 100644 --- a/bin/sh/shell.h +++ b/bin/sh/shell.h @@ -59,7 +59,6 @@ */ typedef intmax_t arith_t; #define ARITH_FORMAT_STR "%" PRIdMAX -#define strtoarith_t(nptr, endptr, base) (intmax_t)strtoumax(nptr, endptr, base) #define ARITH_MIN INTMAX_MIN #define ARITH_MAX INTMAX_MAX diff --git a/bin/sh/tests/expansion/Makefile b/bin/sh/tests/expansion/Makefile index 25cf218f40a..58df967304f 100644 --- a/bin/sh/tests/expansion/Makefile +++ b/bin/sh/tests/expansion/Makefile @@ -22,6 +22,8 @@ ${PACKAGE}FILES+= arith12.0 ${PACKAGE}FILES+= arith13.0 ${PACKAGE}FILES+= arith14.0 ${PACKAGE}FILES+= arith15.0 +${PACKAGE}FILES+= arith16.0 +${PACKAGE}FILES+= arith17.0 ${PACKAGE}FILES+= assign1.0 ${PACKAGE}FILES+= cmdsubst1.0 ${PACKAGE}FILES+= cmdsubst2.0 diff --git a/bin/sh/tests/expansion/arith16.0 b/bin/sh/tests/expansion/arith16.0 new file mode 100644 index 00000000000..b764e3c216f --- /dev/null +++ b/bin/sh/tests/expansion/arith16.0 @@ -0,0 +1,26 @@ +# $FreeBSD$ + +failures=0 + +for x in \ + 0x10000000000000000 \ + -0x8000000000000001 \ + 0xfffffffffffffffffffffffffffffffff \ + -0xfffffffffffffffffffffffffffffffff \ + 02000000000000000000000 \ + 9223372036854775808 \ + 9223372036854775809 \ + -9223372036854775809 \ + 9999999999999999999999999 \ + -9999999999999999999999999 +do + msg=$({ + v=$((x)) || : + } 3>&1 >&2 2>&3 3>&-) + r=$? + if [ "$r" = 0 ] || [ -z "$msg" ]; then + printf 'Failed: %s\n' "$x" + : $((failures += 1)) + fi +done +exit $((failures > 0)) diff --git a/bin/sh/tests/expansion/arith17.0 b/bin/sh/tests/expansion/arith17.0 new file mode 100644 index 00000000000..0a9260886dd --- /dev/null +++ b/bin/sh/tests/expansion/arith17.0 @@ -0,0 +1,3 @@ +# $FreeBSD$ + +[ $((9223372036854775809)) -gt 0 ] From e0d164c7a66426c92dfb5a08a9803e323b8f90b9 Mon Sep 17 00:00:00 2001 From: Conrad Meyer Date: Sun, 10 Feb 2019 23:07:46 +0000 Subject: [PATCH 20/93] Prevent overflow for usertime/systime in caclru1 PR: 76972 and duplicates Reported by: Dr. Christopher Landauer , Steinar Haug Submitted by: Andrey Zonov (earlier version) MFC after: 2 weeks --- sys/kern/kern_resource.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c index 908f2a6c067..a4de84c311c 100644 --- a/sys/kern/kern_resource.c +++ b/sys/kern/kern_resource.c @@ -863,6 +863,15 @@ rufetchtd(struct thread *td, struct rusage *ru) calcru1(p, &td->td_rux, &ru->ru_utime, &ru->ru_stime); } +static uint64_t +mul64_by_fraction(uint64_t a, uint64_t b, uint64_t c) +{ + /* + * Compute floor(a * (b / c)) without overflowing, (b / c) <= 1.0. + */ + return ((a / c) * b + (a % c) * (b / c) + (a % c) * (b % c) / c); +} + static void calcru1(struct proc *p, struct rusage_ext *ruxp, struct timeval *up, struct timeval *sp) @@ -892,10 +901,10 @@ calcru1(struct proc *p, struct rusage_ext *ruxp, struct timeval *up, * The normal case, time increased. * Enforce monotonicity of bucketed numbers. */ - uu = (tu * ut) / tt; + uu = mul64_by_fraction(tu, ut, tt); if (uu < ruxp->rux_uu) uu = ruxp->rux_uu; - su = (tu * st) / tt; + su = mul64_by_fraction(tu, st, tt); if (su < ruxp->rux_su) su = ruxp->rux_su; } else if (tu + 3 > ruxp->rux_tu || 101 * tu > 100 * ruxp->rux_tu) { @@ -924,8 +933,8 @@ calcru1(struct proc *p, struct rusage_ext *ruxp, struct timeval *up, "to %ju usec for pid %d (%s)\n", (uintmax_t)ruxp->rux_tu, (uintmax_t)tu, p->p_pid, p->p_comm); - uu = (tu * ut) / tt; - su = (tu * st) / tt; + uu = mul64_by_fraction(tu, ut, tt); + su = mul64_by_fraction(tu, st, tt); } ruxp->rux_uu = uu; From b5d787d93b3d83f28e87e1f8cc740cb160f8f0ac Mon Sep 17 00:00:00 2001 From: Mariusz Zaborski Date: Sun, 10 Feb 2019 23:28:55 +0000 Subject: [PATCH 21/93] libnv: fix memory leaks nvpair_create_stringv: free the temporary string; this fix affects nvlist_add_stringf() and nvlist_add_stringv(). nvpair_remove_nvlist_array (NV_TYPE_NVLIST_ARRAY case): free the chain of nvpairs (as resetting it prevents nvlist_destroy() from freeing it). Note: freeing the chain in nvlist_destroy() is not sufficient, because it would still leak through nvlist_take_nvlist_array(). This affects all nvlist_*_nvlist_array() use Submitted by: Mindaugas Rasiukevicius Reported by: clang/gcc ASAN MFC after: 2 weeks --- lib/libnv/tests/nvlist_send_recv_test.c | 2 ++ sys/contrib/libnv/nv_impl.h | 1 + sys/contrib/libnv/nvlist.c | 9 +++++++++ sys/contrib/libnv/nvpair.c | 15 +++++++++++---- 4 files changed, 23 insertions(+), 4 deletions(-) diff --git a/lib/libnv/tests/nvlist_send_recv_test.c b/lib/libnv/tests/nvlist_send_recv_test.c index 1b673b5d425..fbc918102b5 100644 --- a/lib/libnv/tests/nvlist_send_recv_test.c +++ b/lib/libnv/tests/nvlist_send_recv_test.c @@ -304,6 +304,8 @@ parent(int sock) name = nvlist_next(nvl, &type, &cookie); CHECK(name == NULL); + + nvlist_destroy(nvl); } static void diff --git a/sys/contrib/libnv/nv_impl.h b/sys/contrib/libnv/nv_impl.h index 56e01a2bfb3..14f2fb8f2c1 100644 --- a/sys/contrib/libnv/nv_impl.h +++ b/sys/contrib/libnv/nv_impl.h @@ -103,6 +103,7 @@ bool nvlist_move_nvpair(nvlist_t *nvl, nvpair_t *nvp); void nvlist_set_parent(nvlist_t *nvl, nvpair_t *parent); void nvlist_set_array_next(nvlist_t *nvl, nvpair_t *ele); +nvpair_t *nvlist_get_array_next_nvpair(nvlist_t *nvl); const nvpair_t *nvlist_get_nvpair(const nvlist_t *nvl, const char *name); diff --git a/sys/contrib/libnv/nvlist.c b/sys/contrib/libnv/nvlist.c index 0101d8c9e8c..15fefc91d1e 100644 --- a/sys/contrib/libnv/nvlist.c +++ b/sys/contrib/libnv/nvlist.c @@ -247,6 +247,15 @@ nvlist_set_array_next(nvlist_t *nvl, nvpair_t *ele) nvl->nvl_array_next = ele; } +nvpair_t * +nvlist_get_array_next_nvpair(nvlist_t *nvl) +{ + + NVLIST_ASSERT(nvl); + + return (nvl->nvl_array_next); +} + bool nvlist_in_array(const nvlist_t *nvl) { diff --git a/sys/contrib/libnv/nvpair.c b/sys/contrib/libnv/nvpair.c index 23038c5c5b7..134b68c93ac 100644 --- a/sys/contrib/libnv/nvpair.c +++ b/sys/contrib/libnv/nvpair.c @@ -229,8 +229,16 @@ nvpair_remove_nvlist_array(nvpair_t *nvp) nvlarray = __DECONST(nvlist_t **, nvpair_get_nvlist_array(nvp, &count)); for (i = 0; i < count; i++) { - nvlist_set_array_next(nvlarray[i], NULL); - nvlist_set_parent(nvlarray[i], NULL); + nvlist_t *nvl; + nvpair_t *nnvp; + + nvl = nvlarray[i]; + nnvp = nvlist_get_array_next_nvpair(nvl); + if (nnvp != NULL) { + nvpair_free_structure(nnvp); + } + nvlist_set_array_next(nvl, NULL); + nvlist_set_parent(nvl, NULL); } } @@ -1193,8 +1201,7 @@ nvpair_create_stringv(const char *name, const char *valuefmt, va_list valueap) if (len < 0) return (NULL); nvp = nvpair_create_string(name, str); - if (nvp == NULL) - nv_free(str); + nv_free(str); return (nvp); } From 0020c845a086766b3315372f006363f8ad76ac54 Mon Sep 17 00:00:00 2001 From: Mariusz Zaborski Date: Sun, 10 Feb 2019 23:30:54 +0000 Subject: [PATCH 22/93] libnv: fix memory leaks Free the data array for NV_TYPE_DESCRIPTOR_ARRAY case. MFC after: 2 weeks --- sys/contrib/libnv/nvpair.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sys/contrib/libnv/nvpair.c b/sys/contrib/libnv/nvpair.c index 134b68c93ac..d45ac34f59c 100644 --- a/sys/contrib/libnv/nvpair.c +++ b/sys/contrib/libnv/nvpair.c @@ -2061,6 +2061,7 @@ nvpair_free(nvpair_t *nvp) case NV_TYPE_DESCRIPTOR_ARRAY: for (i = 0; i < nvp->nvp_nitems; i++) close(((int *)(intptr_t)nvp->nvp_data)[i]); + nv_free((int *)(intptr_t)nvp->nvp_data); break; #endif case NV_TYPE_NVLIST: From f3f08e16a3a8a661dc6b603f7045bc0be18c28e5 Mon Sep 17 00:00:00 2001 From: Andriy Voskoboinyk Date: Sun, 10 Feb 2019 23:58:56 +0000 Subject: [PATCH 23/93] net80211(4): hide casts for 'i_seq' field offset calculation inside ieee80211_getqos() and reuse it in various places. Checked with RTL8188EE, HOSTAP mode + RTL8188CUS, STA mode. MFC after: 2 weeks --- sys/dev/malo/if_malo.c | 20 ++++---------------- sys/dev/mwl/if_mwl.c | 20 ++++---------------- sys/dev/usb/wlan/if_run.c | 6 +----- sys/net80211/ieee80211_adhoc.c | 8 +++----- sys/net80211/ieee80211_hostap.c | 8 +++----- sys/net80211/ieee80211_ht.c | 5 +---- sys/net80211/ieee80211_mesh.c | 7 +------ sys/net80211/ieee80211_output.c | 10 ++-------- sys/net80211/ieee80211_proto.h | 16 ++++++++++++++++ sys/net80211/ieee80211_sta.c | 8 +++----- sys/net80211/ieee80211_wds.c | 8 +++----- 11 files changed, 41 insertions(+), 75 deletions(-) diff --git a/sys/dev/malo/if_malo.c b/sys/dev/malo/if_malo.c index 25f9ee33e74..ce63c5322ab 100644 --- a/sys/dev/malo/if_malo.c +++ b/sys/dev/malo/if_malo.c @@ -1051,13 +1051,9 @@ malo_tx_start(struct malo_softc *sc, struct ieee80211_node *ni, copyhdrlen = hdrlen = ieee80211_anyhdrsize(wh); pktlen = m0->m_pkthdr.len; if (IEEE80211_QOS_HAS_SEQ(wh)) { - if (IEEE80211_IS_DSTODS(wh)) { - qos = *(uint16_t *) - (((struct ieee80211_qosframe_addr4 *) wh)->i_qos); + qos = *(uint16_t *)ieee80211_getqos(wh); + if (IEEE80211_IS_DSTODS(wh)) copyhdrlen -= sizeof(qos); - } else - qos = *(uint16_t *) - (((struct ieee80211_qosframe *) wh)->i_qos); } else qos = 0; @@ -1952,7 +1948,6 @@ malo_rx_proc(void *arg, int npending) struct malo_rxdesc *ds; struct mbuf *m, *mnew; struct ieee80211_qosframe *wh; - struct ieee80211_qosframe_addr4 *wh4; struct ieee80211_node *ni; int off, len, hdrlen, pktlen, rssi, ntodo; uint8_t *data, status; @@ -2062,15 +2057,8 @@ malo_rx_proc(void *arg, int npending) /* NB: don't need to do this sometimes but ... */ /* XXX special case so we can memcpy after m_devget? */ ovbcopy(data + sizeof(uint16_t), wh, hdrlen); - if (IEEE80211_QOS_HAS_SEQ(wh)) { - if (IEEE80211_IS_DSTODS(wh)) { - wh4 = mtod(m, - struct ieee80211_qosframe_addr4*); - *(uint16_t *)wh4->i_qos = ds->qosctrl; - } else { - *(uint16_t *)wh->i_qos = ds->qosctrl; - } - } + if (IEEE80211_QOS_HAS_SEQ(wh)) + *(uint16_t *)ieee80211_getqos(wh) = ds->qosctrl; if (ieee80211_radiotap_active(ic)) { sc->malo_rx_th.wr_flags = 0; sc->malo_rx_th.wr_rate = ds->rate; diff --git a/sys/dev/mwl/if_mwl.c b/sys/dev/mwl/if_mwl.c index b54dc7e8cb5..a33e4932496 100644 --- a/sys/dev/mwl/if_mwl.c +++ b/sys/dev/mwl/if_mwl.c @@ -2614,7 +2614,6 @@ mwl_rx_proc(void *arg, int npending) struct mwl_rxdesc *ds; struct mbuf *m; struct ieee80211_qosframe *wh; - struct ieee80211_qosframe_addr4 *wh4; struct ieee80211_node *ni; struct mwl_node *mn; int off, len, hdrlen, pktlen, rssi, ntodo; @@ -2761,15 +2760,8 @@ mwl_rx_proc(void *arg, int npending) /* NB: don't need to do this sometimes but ... */ /* XXX special case so we can memcpy after m_devget? */ ovbcopy(data + sizeof(uint16_t), wh, hdrlen); - if (IEEE80211_QOS_HAS_SEQ(wh)) { - if (IEEE80211_IS_DSTODS(wh)) { - wh4 = mtod(m, - struct ieee80211_qosframe_addr4*); - *(uint16_t *)wh4->i_qos = ds->QosCtrl; - } else { - *(uint16_t *)wh->i_qos = ds->QosCtrl; - } - } + if (IEEE80211_QOS_HAS_SEQ(wh)) + *(uint16_t *)ieee80211_getqos(wh) = ds->QosCtrl; /* * The f/w strips WEP header but doesn't clear * the WEP bit; mark the packet with M_WEP so @@ -3100,13 +3092,9 @@ mwl_tx_start(struct mwl_softc *sc, struct ieee80211_node *ni, struct mwl_txbuf * copyhdrlen = hdrlen; pktlen = m0->m_pkthdr.len; if (IEEE80211_QOS_HAS_SEQ(wh)) { - if (IEEE80211_IS_DSTODS(wh)) { - qos = *(uint16_t *) - (((struct ieee80211_qosframe_addr4 *) wh)->i_qos); + qos = *(uint16_t *)ieee80211_getqos(wh); + if (IEEE80211_IS_DSTODS(wh)) copyhdrlen -= sizeof(qos); - } else - qos = *(uint16_t *) - (((struct ieee80211_qosframe *) wh)->i_qos); } else qos = 0; diff --git a/sys/dev/usb/wlan/if_run.c b/sys/dev/usb/wlan/if_run.c index 658a3e65f86..b64a994a276 100644 --- a/sys/dev/usb/wlan/if_run.c +++ b/sys/dev/usb/wlan/if_run.c @@ -3369,11 +3369,7 @@ run_tx(struct run_softc *sc, struct mbuf *m, struct ieee80211_node *ni) if ((hasqos = IEEE80211_QOS_HAS_SEQ(wh))) { uint8_t *frm; - if(IEEE80211_HAS_ADDR4(wh)) - frm = ((struct ieee80211_qosframe_addr4 *)wh)->i_qos; - else - frm =((struct ieee80211_qosframe *)wh)->i_qos; - + frm = ieee80211_getqos(wh); qos = le16toh(*(const uint16_t *)frm); tid = qos & IEEE80211_QOS_TID; qid = TID_TO_WME_AC(tid); diff --git a/sys/net80211/ieee80211_adhoc.c b/sys/net80211/ieee80211_adhoc.c index 457db315387..24f6ba448e7 100644 --- a/sys/net80211/ieee80211_adhoc.c +++ b/sys/net80211/ieee80211_adhoc.c @@ -522,11 +522,9 @@ adhoc_input(struct ieee80211_node *ni, struct mbuf *m, /* * Save QoS bits for use below--before we strip the header. */ - if (subtype == IEEE80211_FC0_SUBTYPE_QOS) { - qos = (dir == IEEE80211_FC1_DIR_DSTODS) ? - ((struct ieee80211_qosframe_addr4 *)wh)->i_qos[0] : - ((struct ieee80211_qosframe *)wh)->i_qos[0]; - } else + if (subtype == IEEE80211_FC0_SUBTYPE_QOS) + qos = ieee80211_getqos(wh)[0]; + else qos = 0; /* diff --git a/sys/net80211/ieee80211_hostap.c b/sys/net80211/ieee80211_hostap.c index 70b68d70e8b..263e0c3a2b6 100644 --- a/sys/net80211/ieee80211_hostap.c +++ b/sys/net80211/ieee80211_hostap.c @@ -708,11 +708,9 @@ hostap_input(struct ieee80211_node *ni, struct mbuf *m, /* * Save QoS bits for use below--before we strip the header. */ - if (subtype == IEEE80211_FC0_SUBTYPE_QOS) { - qos = (dir == IEEE80211_FC1_DIR_DSTODS) ? - ((struct ieee80211_qosframe_addr4 *)wh)->i_qos[0] : - ((struct ieee80211_qosframe *)wh)->i_qos[0]; - } else + if (subtype == IEEE80211_FC0_SUBTYPE_QOS) + qos = ieee80211_getqos(wh)[0]; + else qos = 0; /* diff --git a/sys/net80211/ieee80211_ht.c b/sys/net80211/ieee80211_ht.c index 7c742aa9e45..a2bfeda3358 100644 --- a/sys/net80211/ieee80211_ht.c +++ b/sys/net80211/ieee80211_ht.c @@ -886,10 +886,7 @@ ieee80211_ampdu_reorder(struct ieee80211_node *ni, struct mbuf *m, if (IEEE80211_IS_MULTICAST(wh->i_addr1)) return PROCESS; - if (IEEE80211_IS_DSTODS(wh)) - tid = ((struct ieee80211_qosframe_addr4 *)wh)->i_qos[0]; - else - tid = wh->i_qos[0]; + tid = ieee80211_getqos(wh)[0]; tid &= IEEE80211_QOS_TID; rap = &ni->ni_rx_ampdu[tid]; if ((rap->rxa_flags & IEEE80211_AGGR_XCHGPEND) == 0) { diff --git a/sys/net80211/ieee80211_mesh.c b/sys/net80211/ieee80211_mesh.c index b753b45651d..7121b012569 100644 --- a/sys/net80211/ieee80211_mesh.c +++ b/sys/net80211/ieee80211_mesh.c @@ -1655,12 +1655,7 @@ mesh_input(struct ieee80211_node *ni, struct mbuf *m, * in the Mesh Control field and a 3 address qos frame * is used. */ - if (IEEE80211_IS_DSTODS(wh)) - *(uint16_t *)qos = *(uint16_t *) - ((struct ieee80211_qosframe_addr4 *)wh)->i_qos; - else - *(uint16_t *)qos = *(uint16_t *) - ((struct ieee80211_qosframe *)wh)->i_qos; + *(uint16_t *)qos = *(uint16_t *)ieee80211_getqos(wh); /* * NB: The mesh STA sets the Mesh Control Present diff --git a/sys/net80211/ieee80211_output.c b/sys/net80211/ieee80211_output.c index 655d4b73b9d..6dc4739d769 100644 --- a/sys/net80211/ieee80211_output.c +++ b/sys/net80211/ieee80211_output.c @@ -1948,14 +1948,8 @@ ieee80211_fragment(struct ieee80211vap *vap, struct mbuf *m0, whf = mtod(m, struct ieee80211_frame *); memcpy(whf, wh, hdrsize); #ifdef IEEE80211_SUPPORT_MESH - if (vap->iv_opmode == IEEE80211_M_MBSS) { - if (IEEE80211_IS_DSTODS(wh)) - ((struct ieee80211_qosframe_addr4 *) - whf)->i_qos[1] &= ~IEEE80211_QOS_MC; - else - ((struct ieee80211_qosframe *) - whf)->i_qos[1] &= ~IEEE80211_QOS_MC; - } + if (vap->iv_opmode == IEEE80211_M_MBSS) + ieee80211_getqos(wh)[1] &= ~IEEE80211_QOS_MC; #endif *(uint16_t *)&whf->i_seq[0] |= htole16( (fragno & IEEE80211_SEQ_FRAG_MASK) << diff --git a/sys/net80211/ieee80211_proto.h b/sys/net80211/ieee80211_proto.h index c1637c5740f..717de30d00f 100644 --- a/sys/net80211/ieee80211_proto.h +++ b/sys/net80211/ieee80211_proto.h @@ -302,6 +302,22 @@ void ieee80211_wme_ic_getparams(struct ieee80211com *ic, struct chanAccParams *); int ieee80211_wme_vap_ac_is_noack(struct ieee80211vap *vap, int ac); +/* + * Return pointer to the QoS field from a Qos frame. + */ +static __inline uint8_t * +ieee80211_getqos(void *data) +{ + struct ieee80211_frame *wh = data; + + KASSERT(IEEE80211_QOS_HAS_SEQ(wh), ("QoS field is absent!")); + + if (IEEE80211_IS_DSTODS(wh)) + return (((struct ieee80211_qosframe_addr4 *)wh)->i_qos); + else + return (((struct ieee80211_qosframe *)wh)->i_qos); +} + /* * Return the WME TID from a QoS frame. If no TID * is present return the index for the "non-QoS" entry. diff --git a/sys/net80211/ieee80211_sta.c b/sys/net80211/ieee80211_sta.c index 45008f59520..46ba73a9ecd 100644 --- a/sys/net80211/ieee80211_sta.c +++ b/sys/net80211/ieee80211_sta.c @@ -786,11 +786,9 @@ sta_input(struct ieee80211_node *ni, struct mbuf *m, /* * Save QoS bits for use below--before we strip the header. */ - if (subtype == IEEE80211_FC0_SUBTYPE_QOS) { - qos = (dir == IEEE80211_FC1_DIR_DSTODS) ? - ((struct ieee80211_qosframe_addr4 *)wh)->i_qos[0] : - ((struct ieee80211_qosframe *)wh)->i_qos[0]; - } else + if (subtype == IEEE80211_FC0_SUBTYPE_QOS) + qos = ieee80211_getqos(wh)[0]; + else qos = 0; /* diff --git a/sys/net80211/ieee80211_wds.c b/sys/net80211/ieee80211_wds.c index 9bbad6a2764..ea13572cf2d 100644 --- a/sys/net80211/ieee80211_wds.c +++ b/sys/net80211/ieee80211_wds.c @@ -583,11 +583,9 @@ wds_input(struct ieee80211_node *ni, struct mbuf *m, /* * Save QoS bits for use below--before we strip the header. */ - if (subtype == IEEE80211_FC0_SUBTYPE_QOS) { - qos = (dir == IEEE80211_FC1_DIR_DSTODS) ? - ((struct ieee80211_qosframe_addr4 *)wh)->i_qos[0] : - ((struct ieee80211_qosframe *)wh)->i_qos[0]; - } else + if (subtype == IEEE80211_FC0_SUBTYPE_QOS) + qos = ieee80211_getqos(wh)[0]; + else qos = 0; /* From 39f37df26e1c6aabf7d47dcdc79fc780e43b4039 Mon Sep 17 00:00:00 2001 From: Conrad Meyer Date: Mon, 11 Feb 2019 00:11:02 +0000 Subject: [PATCH 24/93] gbde(8) - simplify randomisation with arc4random_buf Submitted by: David CARLIER Differential Revision: https://reviews.freebsd.org/D18678 --- sbin/gbde/gbde.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/sbin/gbde/gbde.c b/sbin/gbde/gbde.c index 16afe5e5a48..421242c38c8 100644 --- a/sbin/gbde/gbde.c +++ b/sbin/gbde/gbde.c @@ -174,18 +174,7 @@ g_read_data(struct g_consumer *cp, off_t offset, off_t length, int *error) static void random_bits(void *p, u_int len) { - static int fdr = -1; - int i; - - if (fdr < 0) { - fdr = open("/dev/urandom", O_RDONLY); - if (fdr < 0) - err(1, "/dev/urandom"); - } - - i = read(fdr, p, len); - if (i != (int)len) - err(1, "read from /dev/urandom"); + arc4random_buf(p, len); } /* XXX: not nice */ From 77b4126ce6f389b135bbad48574923a1264e31fa Mon Sep 17 00:00:00 2001 From: Kyle Evans Date: Mon, 11 Feb 2019 04:00:01 +0000 Subject: [PATCH 25/93] bectl(8): Add -o flag to destroy to clean up the origin snapshot of BE We can't predict when destruction of origin is needed, and currently we have a precedent for not prompting for things. Leave the decision up to the user of bectl(8) if they want the origin snapshot to be destroyed or not. Emits a warning when -o isn't used and an origin snapshot is left to be cleaned up, for the time being. This is handy when one drops the -o flag but really did want to clean up the origin. A couple of -e ignore's have been sprinkled around the test suite for places that we don't care that the origin's not been cleaned up. -o functionality tests will be added in the future, but are omitted for now to reduce conflicts with work in flight to fix bits of the tests. Reported by: Shawn Webb MFC after: 1 week --- sbin/bectl/bectl.8 | 14 +++++++++++--- sbin/bectl/bectl.c | 34 +++++++++++++++++++++++++++------- 2 files changed, 38 insertions(+), 10 deletions(-) diff --git a/sbin/bectl/bectl.8 b/sbin/bectl/bectl.8 index daff7306b0d..fce3d606b36 100644 --- a/sbin/bectl/bectl.8 +++ b/sbin/bectl/bectl.8 @@ -18,7 +18,7 @@ .\" .\" $FreeBSD$ .\" -.Dd December 25, 2018 +.Dd February 10, 2019 .Dt BECTL 8 .Os .Sh NAME @@ -40,7 +40,7 @@ .Ar beName@snapshot .Nm .Cm destroy -.Op Fl F +.Op Fl \&Fo .Brq Ar beName | beName@snapshot .Nm .Cm export @@ -124,7 +124,7 @@ If the flag is given, a recursive boot environment will be made. .It Xo .Cm destroy -.Op Fl F +.Op Fl \&Fo .Brq Ar beName | beName@snapshot .Xc Destroys the given @@ -136,6 +136,14 @@ snapshot without confirmation, unlike in Specifying .Fl F will automatically unmount without confirmation. +.Pp +By default, +.Nm +will warn that it is not destroying the origin of +.Ar beName . +The +.Fl o +flag may be specified to destroy the origin as well. .It Cm export Ar sourceBe Export .Ar sourceBe diff --git a/sbin/bectl/bectl.c b/sbin/bectl/bectl.c index 06e64cdda9d..366fa048a89 100644 --- a/sbin/bectl/bectl.c +++ b/sbin/bectl/bectl.c @@ -341,15 +341,18 @@ bectl_cmd_add(int argc, char *argv[]) static int bectl_cmd_destroy(int argc, char *argv[]) { - char *target; - int opt, err; - bool force; + nvlist_t *props; + char *origin, *target, targetds[BE_MAXPATHLEN]; + int err, flags, opt; - force = false; - while ((opt = getopt(argc, argv, "F")) != -1) { + flags = 0; + while ((opt = getopt(argc, argv, "Fo")) != -1) { switch (opt) { case 'F': - force = true; + flags |= BE_DESTROY_FORCE; + break; + case 'o': + flags |= BE_DESTROY_ORIGIN; break; default: fprintf(stderr, "bectl destroy: unknown option '-%c'\n", @@ -368,7 +371,24 @@ bectl_cmd_destroy(int argc, char *argv[]) target = argv[0]; - err = be_destroy(be, target, force); + /* We'll emit a notice if there's an origin to be cleaned up */ + if ((flags & BE_DESTROY_ORIGIN) == 0 && strchr(target, '@') == NULL) { + if (be_root_concat(be, target, targetds) != 0) + goto destroy; + if (be_prop_list_alloc(&props) != 0) + goto destroy; + if (be_get_dataset_props(be, targetds, props) != 0) { + be_prop_list_free(props); + goto destroy; + } + if (nvlist_lookup_string(props, "origin", &origin) == 0) + fprintf(stderr, "bectl destroy: leaving origin '%s' intact\n", + origin); + be_prop_list_free(props); + } + +destroy: + err = be_destroy(be, target, flags); return (err); } From 6286a6438e32951be6d5f05291332e14ecefd5c2 Mon Sep 17 00:00:00 2001 From: Kyle Evans Date: Mon, 11 Feb 2019 04:00:42 +0000 Subject: [PATCH 26/93] bectl(8): commit missing test modifications from r343993 X-MFC-With: r343993 --- sbin/bectl/tests/bectl_test.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sbin/bectl/tests/bectl_test.sh b/sbin/bectl/tests/bectl_test.sh index 76f22655d44..cc73673d6eb 100755 --- a/sbin/bectl/tests/bectl_test.sh +++ b/sbin/bectl/tests/bectl_test.sh @@ -110,7 +110,7 @@ bectl_destroy_body() bectl_create_setup ${zpool} ${disk} ${mount} atf_check bectl -r ${zpool}/ROOT create -e default default2 atf_check -o not-empty zfs get mountpoint ${zpool}/ROOT/default2 - atf_check bectl -r ${zpool}/ROOT destroy default2 + atf_check -e ignore bectl -r ${zpool}/ROOT destroy default2 atf_check -e not-empty -s not-exit:0 zfs get mountpoint ${zpool}/ROOT/default2 } bectl_destroy_cleanup() @@ -137,7 +137,7 @@ bectl_export_import_body() atf_check -o save:exported bectl -r ${zpool}/ROOT export default atf_check -x "bectl -r ${zpool}/ROOT import default2 < exported" atf_check -o not-empty zfs get mountpoint ${zpool}/ROOT/default2 - atf_check bectl -r ${zpool}/ROOT destroy default2 + atf_check -e ignore bectl -r ${zpool}/ROOT destroy default2 atf_check -e not-empty -s not-exit:0 zfs get mountpoint \ ${zpool}/ROOT/default2 } @@ -171,7 +171,7 @@ bectl_list_body() atf_check bectl -r ${zpool}/ROOT create -e default default2 atf_check -o save:list.out bectl -r ${zpool}/ROOT list atf_check -o not-empty grep 'default2' list.out - atf_check bectl -r ${zpool}/ROOT destroy default2 + atf_check -e ignore bectl -r ${zpool}/ROOT destroy default2 atf_check -o save:list.out bectl -r ${zpool}/ROOT list atf_check -s not-exit:0 grep 'default2' list.out # XXX TODO: Formatting checks From 8f2ac656906a7d498bd6784a09ceeed9f953e2ff Mon Sep 17 00:00:00 2001 From: Patrick Kelsey Date: Mon, 11 Feb 2019 05:17:31 +0000 Subject: [PATCH 27/93] Reduce the time it takes the kernel to install a new PF config containing a large number of queues In general, the time savings come from separating the active and inactive queues lists into separate interface and non-interface queue lists, and changing the rule and queue tag management from list-based to hash-bashed. In HFSC, a linear scan of the class table during each queue destroy was also eliminated. There are now two new tunables to control the hash size used for each tag set (default for each is 128): net.pf.queue_tag_hashsize net.pf.rule_tag_hashsize Reviewed by: kp MFC after: 1 week Sponsored by: RG Nets Differential Revision: https://reviews.freebsd.org/D19131 --- sys/net/altq/altq_cbq.c | 5 +- sys/net/altq/altq_codel.c | 5 +- sys/net/altq/altq_fairq.c | 5 +- sys/net/altq/altq_hfsc.c | 15 +- sys/net/altq/altq_hfsc.h | 1 + sys/net/altq/altq_priq.c | 5 +- sys/net/altq/altq_subr.c | 12 +- sys/net/altq/altq_var.h | 12 +- sys/net/pfvar.h | 10 +- sys/netpfil/pf/pf.c | 12 +- sys/netpfil/pf/pf_ioctl.c | 410 ++++++++++++++++++++++++++------------ 11 files changed, 332 insertions(+), 160 deletions(-) diff --git a/sys/net/altq/altq_cbq.c b/sys/net/altq/altq_cbq.c index aa646848dc3..fd05955bb96 100644 --- a/sys/net/altq/altq_cbq.c +++ b/sys/net/altq/altq_cbq.c @@ -223,12 +223,11 @@ cbq_pfattach(struct pf_altq *a) } int -cbq_add_altq(struct pf_altq *a) +cbq_add_altq(struct ifnet *ifp, struct pf_altq *a) { cbq_state_t *cbqp; - struct ifnet *ifp; - if ((ifp = ifunit(a->ifname)) == NULL) + if (ifp == NULL) return (EINVAL); if (!ALTQ_IS_READY(&ifp->if_snd)) return (ENODEV); diff --git a/sys/net/altq/altq_codel.c b/sys/net/altq/altq_codel.c index ee3ea82b7ab..a98553aa52f 100644 --- a/sys/net/altq/altq_codel.c +++ b/sys/net/altq/altq_codel.c @@ -89,13 +89,12 @@ codel_pfattach(struct pf_altq *a) } int -codel_add_altq(struct pf_altq *a) +codel_add_altq(struct ifnet *ifp, struct pf_altq *a) { struct codel_if *cif; - struct ifnet *ifp; struct codel_opts *opts; - if ((ifp = ifunit(a->ifname)) == NULL) + if (ifp == NULL) return (EINVAL); if (!ALTQ_IS_READY(&ifp->if_snd)) return (ENODEV); diff --git a/sys/net/altq/altq_fairq.c b/sys/net/altq/altq_fairq.c index b695a5133df..c6ccea2b5de 100644 --- a/sys/net/altq/altq_fairq.c +++ b/sys/net/altq/altq_fairq.c @@ -148,12 +148,11 @@ fairq_pfattach(struct pf_altq *a) } int -fairq_add_altq(struct pf_altq *a) +fairq_add_altq(struct ifnet *ifp, struct pf_altq *a) { struct fairq_if *pif; - struct ifnet *ifp; - if ((ifp = ifunit(a->ifname)) == NULL) + if (ifp == NULL) return (EINVAL); if (!ALTQ_IS_READY(&ifp->if_snd)) return (ENODEV); diff --git a/sys/net/altq/altq_hfsc.c b/sys/net/altq/altq_hfsc.c index 1405849c0fa..5a5f56fcd2f 100644 --- a/sys/net/altq/altq_hfsc.c +++ b/sys/net/altq/altq_hfsc.c @@ -159,12 +159,11 @@ hfsc_pfattach(struct pf_altq *a) } int -hfsc_add_altq(struct pf_altq *a) +hfsc_add_altq(struct ifnet *ifp, struct pf_altq *a) { struct hfsc_if *hif; - struct ifnet *ifp; - if ((ifp = ifunit(a->ifname)) == NULL) + if (ifp == NULL) return (EINVAL); if (!ALTQ_IS_READY(&ifp->if_snd)) return (ENODEV); @@ -506,6 +505,7 @@ hfsc_class_create(struct hfsc_if *hif, struct service_curve *rsc, goto err_ret; } } + cl->cl_slot = i; if (flags & HFCF_DEFAULTCLASS) hif->hif_defaultclass = cl; @@ -558,7 +558,7 @@ hfsc_class_create(struct hfsc_if *hif, struct service_curve *rsc, static int hfsc_class_destroy(struct hfsc_class *cl) { - int i, s; + int s; if (cl == NULL) return (0); @@ -589,12 +589,7 @@ hfsc_class_destroy(struct hfsc_class *cl) ASSERT(p != NULL); } - for (i = 0; i < HFSC_MAX_CLASSES; i++) - if (cl->cl_hif->hif_class_tbl[i] == cl) { - cl->cl_hif->hif_class_tbl[i] = NULL; - break; - } - + cl->cl_hif->hif_class_tbl[cl->cl_slot] = NULL; cl->cl_hif->hif_classes--; IFQ_UNLOCK(cl->cl_hif->hif_ifq); splx(s); diff --git a/sys/net/altq/altq_hfsc.h b/sys/net/altq/altq_hfsc.h index fa4aa81134b..c43c6671ca5 100644 --- a/sys/net/altq/altq_hfsc.h +++ b/sys/net/altq/altq_hfsc.h @@ -214,6 +214,7 @@ struct runtime_sc { struct hfsc_class { u_int cl_id; /* class id (just for debug) */ + u_int cl_slot; /* slot in hif class table */ u_int32_t cl_handle; /* class handle */ struct hfsc_if *cl_hif; /* back pointer to struct hfsc_if */ int cl_flags; /* misc flags */ diff --git a/sys/net/altq/altq_priq.c b/sys/net/altq/altq_priq.c index 5a413e3401b..578745023a9 100644 --- a/sys/net/altq/altq_priq.c +++ b/sys/net/altq/altq_priq.c @@ -95,12 +95,11 @@ priq_pfattach(struct pf_altq *a) } int -priq_add_altq(struct pf_altq *a) +priq_add_altq(struct ifnet * ifp, struct pf_altq *a) { struct priq_if *pif; - struct ifnet *ifp; - if ((ifp = ifunit(a->ifname)) == NULL) + if (ifp == NULL) return (EINVAL); if (!ALTQ_IS_READY(&ifp->if_snd)) return (ENODEV); diff --git a/sys/net/altq/altq_subr.c b/sys/net/altq/altq_subr.c index 4e840a5b2e3..e49a925f5e8 100644 --- a/sys/net/altq/altq_subr.c +++ b/sys/net/altq/altq_subr.c @@ -520,7 +520,7 @@ altq_pfdetach(struct pf_altq *a) * malloc with WAITOK, also it is not yet clear which lock to use. */ int -altq_add(struct pf_altq *a) +altq_add(struct ifnet *ifp, struct pf_altq *a) { int error = 0; @@ -535,27 +535,27 @@ altq_add(struct pf_altq *a) switch (a->scheduler) { #ifdef ALTQ_CBQ case ALTQT_CBQ: - error = cbq_add_altq(a); + error = cbq_add_altq(ifp, a); break; #endif #ifdef ALTQ_PRIQ case ALTQT_PRIQ: - error = priq_add_altq(a); + error = priq_add_altq(ifp, a); break; #endif #ifdef ALTQ_HFSC case ALTQT_HFSC: - error = hfsc_add_altq(a); + error = hfsc_add_altq(ifp, a); break; #endif #ifdef ALTQ_FAIRQ case ALTQT_FAIRQ: - error = fairq_add_altq(a); + error = fairq_add_altq(ifp, a); break; #endif #ifdef ALTQ_CODEL case ALTQT_CODEL: - error = codel_add_altq(a); + error = codel_add_altq(ifp, a); break; #endif default: diff --git a/sys/net/altq/altq_var.h b/sys/net/altq/altq_var.h index 47326a03f3d..f711e09334c 100644 --- a/sys/net/altq/altq_var.h +++ b/sys/net/altq/altq_var.h @@ -199,40 +199,40 @@ int tbr_set(struct ifaltq *, struct tb_profile *); int altq_pfattach(struct pf_altq *); int altq_pfdetach(struct pf_altq *); -int altq_add(struct pf_altq *); +int altq_add(struct ifnet *, struct pf_altq *); int altq_remove(struct pf_altq *); int altq_add_queue(struct pf_altq *); int altq_remove_queue(struct pf_altq *); int altq_getqstats(struct pf_altq *, void *, int *, int); int cbq_pfattach(struct pf_altq *); -int cbq_add_altq(struct pf_altq *); +int cbq_add_altq(struct ifnet *, struct pf_altq *); int cbq_remove_altq(struct pf_altq *); int cbq_add_queue(struct pf_altq *); int cbq_remove_queue(struct pf_altq *); int cbq_getqstats(struct pf_altq *, void *, int *, int); int codel_pfattach(struct pf_altq *); -int codel_add_altq(struct pf_altq *); +int codel_add_altq(struct ifnet *, struct pf_altq *); int codel_remove_altq(struct pf_altq *); int codel_getqstats(struct pf_altq *, void *, int *, int); int priq_pfattach(struct pf_altq *); -int priq_add_altq(struct pf_altq *); +int priq_add_altq(struct ifnet *, struct pf_altq *); int priq_remove_altq(struct pf_altq *); int priq_add_queue(struct pf_altq *); int priq_remove_queue(struct pf_altq *); int priq_getqstats(struct pf_altq *, void *, int *, int); int hfsc_pfattach(struct pf_altq *); -int hfsc_add_altq(struct pf_altq *); +int hfsc_add_altq(struct ifnet *, struct pf_altq *); int hfsc_remove_altq(struct pf_altq *); int hfsc_add_queue(struct pf_altq *); int hfsc_remove_queue(struct pf_altq *); int hfsc_getqstats(struct pf_altq *, void *, int *, int); int fairq_pfattach(struct pf_altq *); -int fairq_add_altq(struct pf_altq *); +int fairq_add_altq(struct ifnet *, struct pf_altq *); int fairq_remove_altq(struct pf_altq *); int fairq_add_queue(struct pf_altq *); int fairq_remove_queue(struct pf_altq *); diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h index 2924c06dbc4..ac7ae2b3748 100644 --- a/sys/net/pfvar.h +++ b/sys/net/pfvar.h @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -95,6 +96,9 @@ struct pf_addr_wrap { #ifdef _KERNEL +SYSCTL_DECL(_net_pf); +MALLOC_DECLARE(M_PFHASH); + struct pfi_dynaddr { TAILQ_ENTRY(pfi_dynaddr) entry; struct pf_addr pfid_addr4; @@ -1601,7 +1605,7 @@ VNET_DECLARE(uint64_t, pf_stateid[MAXCPU]); #define V_pf_stateid VNET(pf_stateid) TAILQ_HEAD(pf_altqqueue, pf_altq); -VNET_DECLARE(struct pf_altqqueue, pf_altqs[2]); +VNET_DECLARE(struct pf_altqqueue, pf_altqs[4]); #define V_pf_altqs VNET(pf_altqs) VNET_DECLARE(struct pf_palist, pf_pabuf); #define V_pf_pabuf VNET(pf_pabuf) @@ -1616,8 +1620,12 @@ VNET_DECLARE(u_int32_t, ticket_pabuf); #define V_ticket_pabuf VNET(ticket_pabuf) VNET_DECLARE(struct pf_altqqueue *, pf_altqs_active); #define V_pf_altqs_active VNET(pf_altqs_active) +VNET_DECLARE(struct pf_altqqueue *, pf_altq_ifs_active); +#define V_pf_altq_ifs_active VNET(pf_altq_ifs_active) VNET_DECLARE(struct pf_altqqueue *, pf_altqs_inactive); #define V_pf_altqs_inactive VNET(pf_altqs_inactive) +VNET_DECLARE(struct pf_altqqueue *, pf_altq_ifs_inactive); +#define V_pf_altq_ifs_inactive VNET(pf_altq_ifs_inactive) VNET_DECLARE(struct pf_rulequeue, pf_unlinked_rules); #define V_pf_unlinked_rules VNET(pf_unlinked_rules) diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c index 2c7db9a7e5c..46ffd4ee202 100644 --- a/sys/netpfil/pf/pf.c +++ b/sys/netpfil/pf/pf.c @@ -113,10 +113,12 @@ __FBSDID("$FreeBSD$"); */ /* state tables */ -VNET_DEFINE(struct pf_altqqueue, pf_altqs[2]); +VNET_DEFINE(struct pf_altqqueue, pf_altqs[4]); VNET_DEFINE(struct pf_palist, pf_pabuf); VNET_DEFINE(struct pf_altqqueue *, pf_altqs_active); +VNET_DEFINE(struct pf_altqqueue *, pf_altq_ifs_active); VNET_DEFINE(struct pf_altqqueue *, pf_altqs_inactive); +VNET_DEFINE(struct pf_altqqueue *, pf_altq_ifs_inactive); VNET_DEFINE(struct pf_kstatus, pf_status); VNET_DEFINE(u_int32_t, ticket_altqs_active); @@ -358,7 +360,7 @@ VNET_DEFINE(struct pf_limit, pf_limits[PF_LIMIT_MAX]); counter_u64_add(s->rule.ptr->states_cur, -1); \ } while (0) -static MALLOC_DEFINE(M_PFHASH, "pf_hash", "pf(4) hash header structures"); +MALLOC_DEFINE(M_PFHASH, "pf_hash", "pf(4) hash header structures"); VNET_DEFINE(struct pf_keyhash *, pf_keyhash); VNET_DEFINE(struct pf_idhash *, pf_idhash); VNET_DEFINE(struct pf_srchash *, pf_srchash); @@ -860,9 +862,13 @@ pf_initialize() /* ALTQ */ TAILQ_INIT(&V_pf_altqs[0]); TAILQ_INIT(&V_pf_altqs[1]); + TAILQ_INIT(&V_pf_altqs[2]); + TAILQ_INIT(&V_pf_altqs[3]); TAILQ_INIT(&V_pf_pabuf); V_pf_altqs_active = &V_pf_altqs[0]; - V_pf_altqs_inactive = &V_pf_altqs[1]; + V_pf_altq_ifs_active = &V_pf_altqs[1]; + V_pf_altqs_inactive = &V_pf_altqs[2]; + V_pf_altq_ifs_inactive = &V_pf_altqs[3]; /* Send & overload+flush queues. */ STAILQ_INIT(&V_pf_sendqueue); diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c index fd946d158ba..cb22aa678ba 100644 --- a/sys/netpfil/pf/pf_ioctl.c +++ b/sys/netpfil/pf/pf_ioctl.c @@ -46,11 +46,14 @@ __FBSDID("$FreeBSD$"); #include "opt_pf.h" #include +#include +#include #include #include #include #include #include +#include #include #include #include @@ -129,18 +132,40 @@ VNET_DEFINE_STATIC(int, pf_altq_running); #define TAGID_MAX 50000 struct pf_tagname { - TAILQ_ENTRY(pf_tagname) entries; + TAILQ_ENTRY(pf_tagname) namehash_entries; + TAILQ_ENTRY(pf_tagname) taghash_entries; char name[PF_TAG_NAME_SIZE]; uint16_t tag; int ref; }; -TAILQ_HEAD(pf_tags, pf_tagname); -#define V_pf_tags VNET(pf_tags) -VNET_DEFINE(struct pf_tags, pf_tags); -#define V_pf_qids VNET(pf_qids) -VNET_DEFINE(struct pf_tags, pf_qids); -static MALLOC_DEFINE(M_PFTAG, "pf_tag", "pf(4) tag names"); +struct pf_tagset { + TAILQ_HEAD(, pf_tagname) *namehash; + TAILQ_HEAD(, pf_tagname) *taghash; + unsigned int mask; + uint32_t seed; + BITSET_DEFINE(, TAGID_MAX) avail; +}; + +VNET_DEFINE(struct pf_tagset, pf_tags); +#define V_pf_tags VNET(pf_tags) +static unsigned int pf_rule_tag_hashsize; +#define PF_RULE_TAG_HASH_SIZE_DEFAULT 128 +SYSCTL_UINT(_net_pf, OID_AUTO, rule_tag_hashsize, CTLFLAG_RDTUN, + &pf_rule_tag_hashsize, PF_RULE_TAG_HASH_SIZE_DEFAULT, + "Size of pf(4) rule tag hashtable"); + +#ifdef ALTQ +VNET_DEFINE(struct pf_tagset, pf_qids); +#define V_pf_qids VNET(pf_qids) +static unsigned int pf_queue_tag_hashsize; +#define PF_QUEUE_TAG_HASH_SIZE_DEFAULT 128 +SYSCTL_UINT(_net_pf, OID_AUTO, queue_tag_hashsize, CTLFLAG_RDTUN, + &pf_queue_tag_hashsize, PF_QUEUE_TAG_HASH_SIZE_DEFAULT, + "Size of pf(4) queue tag hashtable"); +#endif +VNET_DEFINE(uma_zone_t, pf_tag_z); +#define V_pf_tag_z VNET(pf_tag_z) static MALLOC_DEFINE(M_PFALTQ, "pf_altq", "pf(4) altq configuration db"); static MALLOC_DEFINE(M_PFRULE, "pf_rule", "pf(4) rules"); @@ -148,9 +173,14 @@ static MALLOC_DEFINE(M_PFRULE, "pf_rule", "pf(4) rules"); #error PF_QNAME_SIZE must be equal to PF_TAG_NAME_SIZE #endif -static u_int16_t tagname2tag(struct pf_tags *, char *); +static void pf_init_tagset(struct pf_tagset *, unsigned int *, + unsigned int); +static void pf_cleanup_tagset(struct pf_tagset *); +static uint16_t tagname2hashindex(const struct pf_tagset *, const char *); +static uint16_t tag2hashindex(const struct pf_tagset *, uint16_t); +static u_int16_t tagname2tag(struct pf_tagset *, char *); static u_int16_t pf_tagname2tag(char *); -static void tag_unref(struct pf_tags *, u_int16_t); +static void tag_unref(struct pf_tagset *, u_int16_t); #define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x @@ -436,68 +466,141 @@ pf_free_rule(struct pf_rule *rule) free(rule, M_PFRULE); } -static u_int16_t -tagname2tag(struct pf_tags *head, char *tagname) +static void +pf_init_tagset(struct pf_tagset *ts, unsigned int *tunable_size, + unsigned int default_size) { - struct pf_tagname *tag, *p = NULL; - u_int16_t new_tagid = 1; + unsigned int i; + unsigned int hashsize; + + if (*tunable_size == 0 || !powerof2(*tunable_size)) + *tunable_size = default_size; + + hashsize = *tunable_size; + ts->namehash = mallocarray(hashsize, sizeof(*ts->namehash), M_PFHASH, + M_WAITOK); + ts->taghash = mallocarray(hashsize, sizeof(*ts->taghash), M_PFHASH, + M_WAITOK); + ts->mask = hashsize - 1; + ts->seed = arc4random(); + for (i = 0; i < hashsize; i++) { + TAILQ_INIT(&ts->namehash[i]); + TAILQ_INIT(&ts->taghash[i]); + } + BIT_FILL(TAGID_MAX, &ts->avail); +} + +static void +pf_cleanup_tagset(struct pf_tagset *ts) +{ + unsigned int i; + unsigned int hashsize; + struct pf_tagname *t, *tmp; + + /* + * Only need to clean up one of the hashes as each tag is hashed + * into each table. + */ + hashsize = ts->mask + 1; + for (i = 0; i < hashsize; i++) + TAILQ_FOREACH_SAFE(t, &ts->namehash[i], namehash_entries, tmp) + uma_zfree(V_pf_tag_z, t); + + free(ts->namehash, M_PFHASH); + free(ts->taghash, M_PFHASH); +} + +static uint16_t +tagname2hashindex(const struct pf_tagset *ts, const char *tagname) +{ + + return (murmur3_32_hash(tagname, strlen(tagname), ts->seed) & ts->mask); +} + +static uint16_t +tag2hashindex(const struct pf_tagset *ts, uint16_t tag) +{ + + return (tag & ts->mask); +} + +static u_int16_t +tagname2tag(struct pf_tagset *ts, char *tagname) +{ + struct pf_tagname *tag; + u_int32_t index; + u_int16_t new_tagid; PF_RULES_WASSERT(); - TAILQ_FOREACH(tag, head, entries) + index = tagname2hashindex(ts, tagname); + TAILQ_FOREACH(tag, &ts->namehash[index], namehash_entries) if (strcmp(tagname, tag->name) == 0) { tag->ref++; return (tag->tag); } /* + * new entry + * * to avoid fragmentation, we do a linear search from the beginning - * and take the first free slot we find. if there is none or the list - * is empty, append a new entry at the end. + * and take the first free slot we find. */ - - /* new entry */ - if (!TAILQ_EMPTY(head)) - for (p = TAILQ_FIRST(head); p != NULL && - p->tag == new_tagid; p = TAILQ_NEXT(p, entries)) - new_tagid = p->tag + 1; - - if (new_tagid > TAGID_MAX) + new_tagid = BIT_FFS(TAGID_MAX, &ts->avail); + /* + * Tags are 1-based, with valid tags in the range [1..TAGID_MAX]. + * BIT_FFS() returns a 1-based bit number, with 0 indicating no bits + * set. It may also return a bit number greater than TAGID_MAX due + * to rounding of the number of bits in the vector up to a multiple + * of the vector word size at declaration/allocation time. + */ + if ((new_tagid == 0) || (new_tagid > TAGID_MAX)) return (0); + /* Mark the tag as in use. Bits are 0-based for BIT_CLR() */ + BIT_CLR(TAGID_MAX, new_tagid - 1, &ts->avail); + /* allocate and fill new struct pf_tagname */ - tag = malloc(sizeof(*tag), M_PFTAG, M_NOWAIT|M_ZERO); + tag = uma_zalloc(V_pf_tag_z, M_NOWAIT); if (tag == NULL) return (0); strlcpy(tag->name, tagname, sizeof(tag->name)); tag->tag = new_tagid; - tag->ref++; + tag->ref = 1; - if (p != NULL) /* insert new entry before p */ - TAILQ_INSERT_BEFORE(p, tag, entries); - else /* either list empty or no free slot in between */ - TAILQ_INSERT_TAIL(head, tag, entries); + /* Insert into namehash */ + TAILQ_INSERT_TAIL(&ts->namehash[index], tag, namehash_entries); + /* Insert into taghash */ + index = tag2hashindex(ts, new_tagid); + TAILQ_INSERT_TAIL(&ts->taghash[index], tag, taghash_entries); + return (tag->tag); } static void -tag_unref(struct pf_tags *head, u_int16_t tag) +tag_unref(struct pf_tagset *ts, u_int16_t tag) { - struct pf_tagname *p, *next; - + struct pf_tagname *t; + uint16_t index; + PF_RULES_WASSERT(); - for (p = TAILQ_FIRST(head); p != NULL; p = next) { - next = TAILQ_NEXT(p, entries); - if (tag == p->tag) { - if (--p->ref == 0) { - TAILQ_REMOVE(head, p, entries); - free(p, M_PFTAG); + index = tag2hashindex(ts, tag); + TAILQ_FOREACH(t, &ts->taghash[index], taghash_entries) + if (tag == t->tag) { + if (--t->ref == 0) { + TAILQ_REMOVE(&ts->taghash[index], t, + taghash_entries); + index = tagname2hashindex(ts, t->name); + TAILQ_REMOVE(&ts->namehash[index], t, + namehash_entries); + /* Bits are 0-based for BIT_SET() */ + BIT_SET(TAGID_MAX, tag - 1, &ts->avail); + uma_zfree(V_pf_tag_z, t); } break; } - } } static u_int16_t @@ -522,22 +625,25 @@ pf_qid_unref(u_int32_t qid) static int pf_begin_altq(u_int32_t *ticket) { - struct pf_altq *altq; + struct pf_altq *altq, *tmp; int error = 0; PF_RULES_WASSERT(); - /* Purge the old altq list */ - while ((altq = TAILQ_FIRST(V_pf_altqs_inactive)) != NULL) { - TAILQ_REMOVE(V_pf_altqs_inactive, altq, entries); - if (altq->qname[0] == 0 && - (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { + /* Purge the old altq lists */ + TAILQ_FOREACH_SAFE(altq, V_pf_altq_ifs_inactive, entries, tmp) { + if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { /* detach and destroy the discipline */ error = altq_remove(altq); - } else - pf_qid_unref(altq->qid); + } free(altq, M_PFALTQ); } + TAILQ_INIT(V_pf_altq_ifs_inactive); + TAILQ_FOREACH_SAFE(altq, V_pf_altqs_inactive, entries, tmp) { + pf_qid_unref(altq->qid); + free(altq, M_PFALTQ); + } + TAILQ_INIT(V_pf_altqs_inactive); if (error) return (error); *ticket = ++V_ticket_altqs_inactive; @@ -548,24 +654,27 @@ pf_begin_altq(u_int32_t *ticket) static int pf_rollback_altq(u_int32_t ticket) { - struct pf_altq *altq; + struct pf_altq *altq, *tmp; int error = 0; PF_RULES_WASSERT(); if (!V_altqs_inactive_open || ticket != V_ticket_altqs_inactive) return (0); - /* Purge the old altq list */ - while ((altq = TAILQ_FIRST(V_pf_altqs_inactive)) != NULL) { - TAILQ_REMOVE(V_pf_altqs_inactive, altq, entries); - if (altq->qname[0] == 0 && - (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { + /* Purge the old altq lists */ + TAILQ_FOREACH_SAFE(altq, V_pf_altq_ifs_inactive, entries, tmp) { + if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { /* detach and destroy the discipline */ error = altq_remove(altq); - } else - pf_qid_unref(altq->qid); + } free(altq, M_PFALTQ); } + TAILQ_INIT(V_pf_altq_ifs_inactive); + TAILQ_FOREACH_SAFE(altq, V_pf_altqs_inactive, entries, tmp) { + pf_qid_unref(altq->qid); + free(altq, M_PFALTQ); + } + TAILQ_INIT(V_pf_altqs_inactive); V_altqs_inactive_open = 0; return (error); } @@ -573,8 +682,8 @@ pf_rollback_altq(u_int32_t ticket) static int pf_commit_altq(u_int32_t ticket) { - struct pf_altqqueue *old_altqs; - struct pf_altq *altq; + struct pf_altqqueue *old_altqs, *old_altq_ifs; + struct pf_altq *altq, *tmp; int err, error = 0; PF_RULES_WASSERT(); @@ -584,14 +693,16 @@ pf_commit_altq(u_int32_t ticket) /* swap altqs, keep the old. */ old_altqs = V_pf_altqs_active; + old_altq_ifs = V_pf_altq_ifs_active; V_pf_altqs_active = V_pf_altqs_inactive; + V_pf_altq_ifs_active = V_pf_altq_ifs_inactive; V_pf_altqs_inactive = old_altqs; + V_pf_altq_ifs_inactive = old_altq_ifs; V_ticket_altqs_active = V_ticket_altqs_inactive; /* Attach new disciplines */ - TAILQ_FOREACH(altq, V_pf_altqs_active, entries) { - if (altq->qname[0] == 0 && - (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { + TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries) { + if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { /* attach the discipline */ error = altq_pfattach(altq); if (error == 0 && V_pf_altq_running) @@ -601,11 +712,9 @@ pf_commit_altq(u_int32_t ticket) } } - /* Purge the old altq list */ - while ((altq = TAILQ_FIRST(V_pf_altqs_inactive)) != NULL) { - TAILQ_REMOVE(V_pf_altqs_inactive, altq, entries); - if (altq->qname[0] == 0 && - (altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { + /* Purge the old altq lists */ + TAILQ_FOREACH_SAFE(altq, V_pf_altq_ifs_inactive, entries, tmp) { + if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { /* detach and destroy the discipline */ if (V_pf_altq_running) error = pf_disable_altq(altq); @@ -615,10 +724,15 @@ pf_commit_altq(u_int32_t ticket) err = altq_remove(altq); if (err != 0 && error == 0) error = err; - } else - pf_qid_unref(altq->qid); + } free(altq, M_PFALTQ); } + TAILQ_INIT(V_pf_altq_ifs_inactive); + TAILQ_FOREACH_SAFE(altq, V_pf_altqs_inactive, entries, tmp) { + pf_qid_unref(altq->qid); + free(altq, M_PFALTQ); + } + TAILQ_INIT(V_pf_altqs_inactive); V_altqs_inactive_open = 0; return (error); @@ -675,10 +789,34 @@ pf_disable_altq(struct pf_altq *altq) return (error); } +static int +pf_altq_ifnet_event_add(struct ifnet *ifp, int remove, u_int32_t ticket, + struct pf_altq *altq) +{ + struct ifnet *ifp1; + int error = 0; + + /* Deactivate the interface in question */ + altq->local_flags &= ~PFALTQ_FLAG_IF_REMOVED; + if ((ifp1 = ifunit(altq->ifname)) == NULL || + (remove && ifp1 == ifp)) { + altq->local_flags |= PFALTQ_FLAG_IF_REMOVED; + } else { + error = altq_add(ifp1, altq); + + if (ticket != V_ticket_altqs_inactive) + error = EBUSY; + + if (error) + free(altq, M_PFALTQ); + } + + return (error); +} + void pf_altq_ifnet_event(struct ifnet *ifp, int remove) { - struct ifnet *ifp1; struct pf_altq *a1, *a2, *a3; u_int32_t ticket; int error = 0; @@ -700,6 +838,22 @@ pf_altq_ifnet_event(struct ifnet *ifp, int remove) return; /* Copy the current active set */ + TAILQ_FOREACH(a1, V_pf_altq_ifs_active, entries) { + a2 = malloc(sizeof(*a2), M_PFALTQ, M_NOWAIT); + if (a2 == NULL) { + error = ENOMEM; + break; + } + bcopy(a1, a2, sizeof(struct pf_altq)); + + error = pf_altq_ifnet_event_add(ifp, remove, ticket, a2); + if (error) + break; + + TAILQ_INSERT_TAIL(V_pf_altq_ifs_inactive, a2, entries); + } + if (error) + goto out; TAILQ_FOREACH(a1, V_pf_altqs_active, entries) { a2 = malloc(sizeof(*a2), M_PFALTQ, M_NOWAIT); if (a2 == NULL) { @@ -708,41 +862,27 @@ pf_altq_ifnet_event(struct ifnet *ifp, int remove) } bcopy(a1, a2, sizeof(struct pf_altq)); - if (a2->qname[0] != 0) { - if ((a2->qid = pf_qname2qid(a2->qname)) == 0) { - error = EBUSY; - free(a2, M_PFALTQ); - break; - } - a2->altq_disc = NULL; - TAILQ_FOREACH(a3, V_pf_altqs_inactive, entries) { - if (strncmp(a3->ifname, a2->ifname, - IFNAMSIZ) == 0 && a3->qname[0] == 0) { - a2->altq_disc = a3->altq_disc; - break; - } - } + if ((a2->qid = pf_qname2qid(a2->qname)) == 0) { + error = EBUSY; + free(a2, M_PFALTQ); + break; } - /* Deactivate the interface in question */ - a2->local_flags &= ~PFALTQ_FLAG_IF_REMOVED; - if ((ifp1 = ifunit(a2->ifname)) == NULL || - (remove && ifp1 == ifp)) { - a2->local_flags |= PFALTQ_FLAG_IF_REMOVED; - } else { - error = altq_add(a2); - - if (ticket != V_ticket_altqs_inactive) - error = EBUSY; - - if (error) { - free(a2, M_PFALTQ); + a2->altq_disc = NULL; + TAILQ_FOREACH(a3, V_pf_altq_ifs_inactive, entries) { + if (strncmp(a3->ifname, a2->ifname, + IFNAMSIZ) == 0) { + a2->altq_disc = a3->altq_disc; break; } } + error = pf_altq_ifnet_event_add(ifp, remove, ticket, a2); + if (error) + break; TAILQ_INSERT_TAIL(V_pf_altqs_inactive, a2, entries); } +out: if (error != 0) pf_rollback_altq(ticket); else @@ -1222,6 +1362,28 @@ pf_import_kaltq(struct pfioc_altq_v1 *pa, struct pf_altq *q, size_t ioc_size) } #endif /* ALTQ */ +static struct pf_altq * +pf_altq_get_nth_active(u_int32_t n) +{ + struct pf_altq *altq; + u_int32_t nr; + + nr = 0; + TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries) { + if (nr == n) + return (altq); + nr++; + } + + TAILQ_FOREACH(altq, V_pf_altqs_active, entries) { + if (nr == n) + return (altq); + nr++; + } + + return (NULL); +} + static int pfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td) { @@ -2269,9 +2431,8 @@ DIOCGETSTATES_full: PF_RULES_WLOCK(); /* enable all altq interfaces on active list */ - TAILQ_FOREACH(altq, V_pf_altqs_active, entries) { - if (altq->qname[0] == 0 && (altq->local_flags & - PFALTQ_FLAG_IF_REMOVED) == 0) { + TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries) { + if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { error = pf_enable_altq(altq); if (error != 0) break; @@ -2289,9 +2450,8 @@ DIOCGETSTATES_full: PF_RULES_WLOCK(); /* disable all altq interfaces on active list */ - TAILQ_FOREACH(altq, V_pf_altqs_active, entries) { - if (altq->qname[0] == 0 && (altq->local_flags & - PFALTQ_FLAG_IF_REMOVED) == 0) { + TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries) { + if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { error = pf_disable_altq(altq); if (error != 0) break; @@ -2336,9 +2496,9 @@ DIOCGETSTATES_full: break; } altq->altq_disc = NULL; - TAILQ_FOREACH(a, V_pf_altqs_inactive, entries) { + TAILQ_FOREACH(a, V_pf_altq_ifs_inactive, entries) { if (strncmp(a->ifname, altq->ifname, - IFNAMSIZ) == 0 && a->qname[0] == 0) { + IFNAMSIZ) == 0) { altq->altq_disc = a->altq_disc; break; } @@ -2348,7 +2508,7 @@ DIOCGETSTATES_full: if ((ifp = ifunit(altq->ifname)) == NULL) altq->local_flags |= PFALTQ_FLAG_IF_REMOVED; else - error = altq_add(altq); + error = altq_add(ifp, altq); if (error) { PF_RULES_WUNLOCK(); @@ -2356,7 +2516,10 @@ DIOCGETSTATES_full: break; } - TAILQ_INSERT_TAIL(V_pf_altqs_inactive, altq, entries); + if (altq->qname[0] != 0) + TAILQ_INSERT_TAIL(V_pf_altqs_inactive, altq, entries); + else + TAILQ_INSERT_TAIL(V_pf_altq_ifs_inactive, altq, entries); /* version error check done on import above */ pf_export_kaltq(altq, pa, IOCPARM_LEN(cmd)); PF_RULES_WUNLOCK(); @@ -2370,6 +2533,8 @@ DIOCGETSTATES_full: PF_RULES_RLOCK(); pa->nr = 0; + TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries) + pa->nr++; TAILQ_FOREACH(altq, V_pf_altqs_active, entries) pa->nr++; pa->ticket = V_ticket_altqs_active; @@ -2381,7 +2546,6 @@ DIOCGETSTATES_full: case DIOCGETALTQV1: { struct pfioc_altq_v1 *pa = (struct pfioc_altq_v1 *)addr; struct pf_altq *altq; - u_int32_t nr; PF_RULES_RLOCK(); if (pa->ticket != V_ticket_altqs_active) { @@ -2389,12 +2553,7 @@ DIOCGETSTATES_full: error = EBUSY; break; } - nr = 0; - altq = TAILQ_FIRST(V_pf_altqs_active); - while ((altq != NULL) && (nr < pa->nr)) { - altq = TAILQ_NEXT(altq, entries); - nr++; - } + altq = pf_altq_get_nth_active(pa->nr); if (altq == NULL) { PF_RULES_RUNLOCK(); error = EBUSY; @@ -2415,7 +2574,6 @@ DIOCGETSTATES_full: case DIOCGETQSTATSV1: { struct pfioc_qstats_v1 *pq = (struct pfioc_qstats_v1 *)addr; struct pf_altq *altq; - u_int32_t nr; int nbytes; u_int32_t version; @@ -2426,12 +2584,7 @@ DIOCGETSTATES_full: break; } nbytes = pq->nbytes; - nr = 0; - altq = TAILQ_FIRST(V_pf_altqs_active); - while ((altq != NULL) && (nr < pq->nr)) { - altq = TAILQ_NEXT(altq, entries); - nr++; - } + altq = pf_altq_get_nth_active(pq->nr); if (altq == NULL) { PF_RULES_RUNLOCK(); error = EBUSY; @@ -4173,8 +4326,15 @@ dehook_pf(void) static void pf_load_vnet(void) { - TAILQ_INIT(&V_pf_tags); - TAILQ_INIT(&V_pf_qids); + V_pf_tag_z = uma_zcreate("pf tags", sizeof(struct pf_tagname), + NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); + + pf_init_tagset(&V_pf_tags, &pf_rule_tag_hashsize, + PF_RULE_TAG_HASH_SIZE_DEFAULT); +#ifdef ALTQ + pf_init_tagset(&V_pf_qids, &pf_queue_tag_hashsize, + PF_QUEUE_TAG_HASH_SIZE_DEFAULT); +#endif pfattach_vnet(); V_pf_vnet_active = 1; @@ -4241,6 +4401,12 @@ pf_unload_vnet(void) if (IS_DEFAULT_VNET(curvnet)) pf_mtag_cleanup(); + pf_cleanup_tagset(&V_pf_tags); +#ifdef ALTQ + pf_cleanup_tagset(&V_pf_qids); +#endif + uma_zdestroy(V_pf_tag_z); + /* Free counters last as we updated them during shutdown. */ counter_u64_free(V_pf_default_rule.states_cur); counter_u64_free(V_pf_default_rule.states_tot); From d178fee632ca8cc891cf33737bb8ca94a2a9555e Mon Sep 17 00:00:00 2001 From: Patrick Kelsey Date: Mon, 11 Feb 2019 05:39:38 +0000 Subject: [PATCH 28/93] Place pf_altq_get_nth_active() under the ALTQ ifdef MFC after: 1 week --- sys/netpfil/pf/pf_ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c index cb22aa678ba..c6f9f8451be 100644 --- a/sys/netpfil/pf/pf_ioctl.c +++ b/sys/netpfil/pf/pf_ioctl.c @@ -1360,7 +1360,6 @@ pf_import_kaltq(struct pfioc_altq_v1 *pa, struct pf_altq *q, size_t ioc_size) return (0); } -#endif /* ALTQ */ static struct pf_altq * pf_altq_get_nth_active(u_int32_t n) @@ -1383,6 +1382,7 @@ pf_altq_get_nth_active(u_int32_t n) return (NULL); } +#endif /* ALTQ */ static int pfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td) From 3af08701cd5e734620cac6ea1e051c316869fdf9 Mon Sep 17 00:00:00 2001 From: Oleksandr Tymoshenko Date: Mon, 11 Feb 2019 07:42:32 +0000 Subject: [PATCH 29/93] Fix off-by-one error in BERI virtio driver The hardcoded ident is exactly 20 bytes long but sprintf adds terminating zero, so there is one byte written out of array bounds.As a fix use strncpy it appends \0 only if space allows and its behavior matches virtio spec: When VIRTIO_BLK_T_GET_ID is issued, the device identifier, up to 20 bytes, is written to the buffer. The identifier should be interpreted as an ascii string. It is terminated with \0, unless it is exactly 20 bytes long. PR: 202298 Reviewed by: br MFC after: 1 week Differential Revision: https://reviews.freebsd.org/D18852 --- sys/dev/beri/virtio/virtio_block.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sys/dev/beri/virtio/virtio_block.c b/sys/dev/beri/virtio/virtio_block.c index 50bb7f3a53b..7c8a03966a3 100644 --- a/sys/dev/beri/virtio/virtio_block.c +++ b/sys/dev/beri/virtio/virtio_block.c @@ -187,7 +187,7 @@ vtblk_proc(struct beri_vtblk_softc *sc, struct vqueue_info *vq) break; case VIRTIO_BLK_T_GET_ID: /* Assume a single buffer */ - strlcpy(iov[1].iov_base, sc->ident, + strncpy(iov[1].iov_base, sc->ident, MIN(iov[1].iov_len, sizeof(sc->ident))); err = 0; break; @@ -401,7 +401,7 @@ backend_info(struct beri_vtblk_softc *sc) s+=1; } - sprintf(sc->ident, "Virtio block backend"); + strncpy(sc->ident, "Virtio block backend", sizeof(sc->ident)); return (0); } From 66bddb4c701f48bcb12df01e872eb6bcba08443a Mon Sep 17 00:00:00 2001 From: Ganbold Tsagaankhuu Date: Mon, 11 Feb 2019 14:31:19 +0000 Subject: [PATCH 30/93] Add sensors support for AXP803/AXP813. Sensor values such as battery charging, charge state, voltage, charging current, discharging current, battery capacity etc. can be obtained via sysctl. Reviewed by: manu Differential Revision: https://reviews.freebsd.org/D19145 --- sys/arm/allwinner/axp81x.c | 268 ++++++++++++++++++++++++++++++++++++- 1 file changed, 267 insertions(+), 1 deletion(-) diff --git a/sys/arm/allwinner/axp81x.c b/sys/arm/allwinner/axp81x.c index 7bfe1a5398e..2913fe91878 100644 --- a/sys/arm/allwinner/axp81x.c +++ b/sys/arm/allwinner/axp81x.c @@ -194,6 +194,11 @@ MALLOC_DEFINE(M_AXP8XX_REG, "AXP8xx regulator", "AXP8xx power regulator"); #define AXP_BAT_CAP_WARN_LV1 0xf0 /* Bits 4, 5, 6, 7 */ #define AXP_BAT_CAP_WARN_LV2 0xf /* Bits 0, 1, 2, 3 */ +/* Sensor conversion macros */ +#define AXP_SENSOR_BAT_H(hi) ((hi) << 4) +#define AXP_SENSOR_BAT_L(lo) ((lo) & 0xf) +#define AXP_SENSOR_COULOMB(hi, lo) (((hi & ~(1 << 7)) << 8) | (lo)) + static const struct { const char *name; uint8_t ctrl_reg; @@ -538,6 +543,123 @@ static struct axp8xx_regdef axp8xx_common_regdefs[] = { }, }; +enum axp8xx_sensor { + AXP_SENSOR_ACIN_PRESENT, + AXP_SENSOR_VBUS_PRESENT, + AXP_SENSOR_BATT_PRESENT, + AXP_SENSOR_BATT_CHARGING, + AXP_SENSOR_BATT_CHARGE_STATE, + AXP_SENSOR_BATT_VOLTAGE, + AXP_SENSOR_BATT_CHARGE_CURRENT, + AXP_SENSOR_BATT_DISCHARGE_CURRENT, + AXP_SENSOR_BATT_CAPACITY_PERCENT, + AXP_SENSOR_BATT_MAXIMUM_CAPACITY, + AXP_SENSOR_BATT_CURRENT_CAPACITY, +}; + +enum battery_capacity_state { + BATT_CAPACITY_NORMAL = 1, /* normal cap in battery */ + BATT_CAPACITY_WARNING, /* warning cap in battery */ + BATT_CAPACITY_CRITICAL, /* critical cap in battery */ + BATT_CAPACITY_HIGH, /* high cap in battery */ + BATT_CAPACITY_MAX, /* maximum cap in battery */ + BATT_CAPACITY_LOW /* low cap in battery */ +}; + +struct axp8xx_sensors { + int id; + const char *name; + const char *desc; + const char *format; +}; + +static const struct axp8xx_sensors axp8xx_common_sensors[] = { + { + .id = AXP_SENSOR_ACIN_PRESENT, + .name = "acin", + .format = "I", + .desc = "ACIN Present", + }, + { + .id = AXP_SENSOR_VBUS_PRESENT, + .name = "vbus", + .format = "I", + .desc = "VBUS Present", + }, + { + .id = AXP_SENSOR_BATT_PRESENT, + .name = "bat", + .format = "I", + .desc = "Battery Present", + }, + { + .id = AXP_SENSOR_BATT_CHARGING, + .name = "batcharging", + .format = "I", + .desc = "Battery Charging", + }, + { + .id = AXP_SENSOR_BATT_CHARGE_STATE, + .name = "batchargestate", + .format = "I", + .desc = "Battery Charge State", + }, + { + .id = AXP_SENSOR_BATT_VOLTAGE, + .name = "batvolt", + .format = "I", + .desc = "Battery Voltage", + }, + { + .id = AXP_SENSOR_BATT_CHARGE_CURRENT, + .name = "batchargecurrent", + .format = "I", + .desc = "Battery Charging Current", + }, + { + .id = AXP_SENSOR_BATT_DISCHARGE_CURRENT, + .name = "batdischargecurrent", + .format = "I", + .desc = "Battery Discharging Current", + }, + { + .id = AXP_SENSOR_BATT_CAPACITY_PERCENT, + .name = "batcapacitypercent", + .format = "I", + .desc = "Battery Capacity Percentage", + }, + { + .id = AXP_SENSOR_BATT_MAXIMUM_CAPACITY, + .name = "batmaxcapacity", + .format = "I", + .desc = "Battery Maximum Capacity", + }, + { + .id = AXP_SENSOR_BATT_CURRENT_CAPACITY, + .name = "batcurrentcapacity", + .format = "I", + .desc = "Battery Current Capacity", + }, +}; + +struct axp8xx_config { + const char *name; + int batsense_step; /* uV */ + int charge_step; /* uA */ + int discharge_step; /* uA */ + int maxcap_step; /* uAh */ + int coulomb_step; /* uAh */ +}; + +static struct axp8xx_config axp803_config = { + .name = "AXP803", + .batsense_step = 1100, + .charge_step = 1000, + .discharge_step = 1000, + .maxcap_step = 1456, + .coulomb_step = 1456, +}; + struct axp8xx_softc; struct axp8xx_reg_sc { @@ -558,9 +680,20 @@ struct axp8xx_softc { int type; + /* Configs */ + const struct axp8xx_config *config; + + /* Sensors */ + const struct axp8xx_sensors *sensors; + int nsensors; + /* Regulators */ struct axp8xx_reg_sc **regs; int nregs; + + /* Warning, shutdown thresholds */ + int warn_thres; + int shut_thres; }; #define AXP_LOCK(sc) mtx_lock(&(sc)->mtx) @@ -756,6 +889,110 @@ axp8xx_shutdown(void *devp, int howto) axp8xx_write(dev, AXP_POWERBAT, AXP_POWERBAT_SHUTDOWN); } +static int +axp8xx_sysctl(SYSCTL_HANDLER_ARGS) +{ + struct axp8xx_softc *sc; + device_t dev = arg1; + enum axp8xx_sensor sensor = arg2; + const struct axp8xx_config *c; + uint8_t data; + int val, i, found, batt_val; + uint8_t lo, hi; + + sc = device_get_softc(dev); + c = sc->config; + + for (found = 0, i = 0; i < sc->nsensors; i++) { + if (sc->sensors[i].id == sensor) { + found = 1; + break; + } + } + + if (found == 0) + return (ENOENT); + + switch (sensor) { + case AXP_SENSOR_ACIN_PRESENT: + if (axp8xx_read(dev, AXP_POWERSRC, &data, 1) == 0) + val = !!(data & AXP_POWERSRC_ACIN); + break; + case AXP_SENSOR_VBUS_PRESENT: + if (axp8xx_read(dev, AXP_POWERSRC, &data, 1) == 0) + val = !!(data & AXP_POWERSRC_VBUS); + break; + case AXP_SENSOR_BATT_PRESENT: + if (axp8xx_read(dev, AXP_POWERMODE, &data, 1) == 0) { + if (data & AXP_POWERMODE_BAT_VALID) + val = !!(data & AXP_POWERMODE_BAT_PRESENT); + } + break; + case AXP_SENSOR_BATT_CHARGING: + if (axp8xx_read(dev, AXP_POWERMODE, &data, 1) == 0) + val = !!(data & AXP_POWERMODE_BAT_CHARGING); + break; + case AXP_SENSOR_BATT_CHARGE_STATE: + if (axp8xx_read(dev, AXP_BAT_CAP, &data, 1) == 0 && + (data & AXP_BAT_CAP_VALID) != 0) { + batt_val = (data & AXP_BAT_CAP_PERCENT); + if (batt_val <= sc->shut_thres) + val = BATT_CAPACITY_CRITICAL; + else if (batt_val <= sc->warn_thres) + val = BATT_CAPACITY_WARNING; + else + val = BATT_CAPACITY_NORMAL; + } + break; + case AXP_SENSOR_BATT_CAPACITY_PERCENT: + if (axp8xx_read(dev, AXP_BAT_CAP, &data, 1) == 0 && + (data & AXP_BAT_CAP_VALID) != 0) + val = (data & AXP_BAT_CAP_PERCENT); + break; + case AXP_SENSOR_BATT_VOLTAGE: + if (axp8xx_read(dev, AXP_BATSENSE_HI, &hi, 1) == 0 && + axp8xx_read(dev, AXP_BATSENSE_LO, &lo, 1) == 0) { + val = (AXP_SENSOR_BAT_H(hi) | AXP_SENSOR_BAT_L(lo)); + val *= c->batsense_step; + } + break; + case AXP_SENSOR_BATT_CHARGE_CURRENT: + if (axp8xx_read(dev, AXP_POWERSRC, &data, 1) == 0 && + (data & AXP_POWERSRC_CHARING) != 0 && + axp8xx_read(dev, AXP_BATCHG_HI, &hi, 1) == 0 && + axp8xx_read(dev, AXP_BATCHG_LO, &lo, 1) == 0) { + val = (AXP_SENSOR_BAT_H(hi) | AXP_SENSOR_BAT_L(lo)); + val *= c->charge_step; + } + break; + case AXP_SENSOR_BATT_DISCHARGE_CURRENT: + if (axp8xx_read(dev, AXP_POWERSRC, &data, 1) == 0 && + (data & AXP_POWERSRC_CHARING) == 0 && + axp8xx_read(dev, AXP_BATDISCHG_HI, &hi, 1) == 0 && + axp8xx_read(dev, AXP_BATDISCHG_LO, &lo, 1) == 0) { + val = (AXP_SENSOR_BAT_H(hi) | AXP_SENSOR_BAT_L(lo)); + val *= c->discharge_step; + } + break; + case AXP_SENSOR_BATT_MAXIMUM_CAPACITY: + if (axp8xx_read(dev, AXP_BAT_MAX_CAP_HI, &hi, 1) == 0 && + axp8xx_read(dev, AXP_BAT_MAX_CAP_LO, &lo, 1) == 0) { + val = AXP_SENSOR_COULOMB(hi, lo); + val *= c->maxcap_step; + } + break; + case AXP_SENSOR_BATT_CURRENT_CAPACITY: + if (axp8xx_read(dev, AXP_BAT_COULOMB_HI, &hi, 1) == 0 && + axp8xx_read(dev, AXP_BAT_COULOMB_LO, &lo, 1) == 0) { + val = AXP_SENSOR_COULOMB(hi, lo); + val *= c->coulomb_step; + } + break; + } + + return sysctl_handle_opaque(oidp, &val, sizeof(val), req); +} + static void axp8xx_intr(void *arg) { @@ -1157,7 +1394,7 @@ axp8xx_attach(device_t dev) { struct axp8xx_softc *sc; struct axp8xx_reg_sc *reg; - uint8_t chip_id; + uint8_t chip_id, val; phandle_t rnode, child; int error, i; @@ -1187,6 +1424,10 @@ axp8xx_attach(device_t dev) sc->nregs += nitems(axp813_regdefs); break; } + sc->config = &axp803_config; + sc->sensors = axp8xx_common_sensors; + sc->nsensors = nitems(axp8xx_common_sensors); + sc->regs = malloc(sizeof(struct axp8xx_reg_sc *) * sc->nregs, M_AXP8XX_REG, M_WAITOK | M_ZERO); @@ -1231,6 +1472,31 @@ axp8xx_attach(device_t dev) } } + /* Add sensors */ + for (i = 0; i < sc->nsensors; i++) { + SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), + SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), + OID_AUTO, sc->sensors[i].name, + CTLTYPE_INT | CTLFLAG_RD, + dev, sc->sensors[i].id, axp8xx_sysctl, + sc->sensors[i].format, + sc->sensors[i].desc); + } + + /* Get thresholds */ + if (axp8xx_read(dev, AXP_BAT_CAP_WARN, &val, 1) == 0) { + sc->warn_thres = (val & AXP_BAT_CAP_WARN_LV1) >> 4; + sc->shut_thres = (val & AXP_BAT_CAP_WARN_LV2); + if (bootverbose) { + device_printf(dev, + "Raw reg val: 0x%02x\n", val); + device_printf(dev, + "Warning threshold: 0x%02x\n", sc->warn_thres); + device_printf(dev, + "Shutdown threshold: 0x%02x\n", sc->shut_thres); + } + } + /* Enable interrupts */ axp8xx_write(dev, AXP_IRQEN1, AXP_IRQEN1_VBUS_LO | From 74a083d6c718bf7f7eacf4a83578dcb3dd3de5bb Mon Sep 17 00:00:00 2001 From: Michael Tuexen Date: Mon, 11 Feb 2019 15:38:05 +0000 Subject: [PATCH 31/93] Fix flags used when compiling kern_kcov.c and subr_coverage.c. Without this fix, the usage of kernel coverage would lockup the system. Thanks to Andrew for suggesting the final form of the fix. PR: 235611 Reviewed by: andrew@, emaste@ Differential Revision: https://reviews.freebsd.org/D19135 --- sys/conf/files | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sys/conf/files b/sys/conf/files index d3c2c032413..74cabfb486f 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -3808,7 +3808,7 @@ kern/kern_idle.c standard kern/kern_intr.c standard kern/kern_jail.c standard kern/kern_kcov.c optional kcov \ - compile-with "${NORMAL_C} -fno-sanitize=all" + compile-with "${NORMAL_C:N-fsanitize*}" kern/kern_khelp.c standard kern/kern_kthread.c standard kern/kern_ktr.c optional ktr @@ -3884,7 +3884,7 @@ kern/subr_clock.c standard kern/subr_compressor.c standard \ compile-with "${NORMAL_C} -I$S/contrib/zstd/lib/freebsd" kern/subr_coverage.c optional coverage \ - compile-with "${NORMAL_C} -fno-sanitize=all" + compile-with "${NORMAL_C:N-fsanitize*}" kern/subr_counter.c standard kern/subr_devstat.c standard kern/subr_disk.c standard From aa255a10b02e74bd805356e96122fc030c44a793 Mon Sep 17 00:00:00 2001 From: Martin Cracauer Date: Mon, 11 Feb 2019 15:51:28 +0000 Subject: [PATCH 32/93] Clarify NFSv4 /etc/exports semantics, with working example. The existing wording has been confusing users for years. --- usr.sbin/mountd/exports.5 | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/usr.sbin/mountd/exports.5 b/usr.sbin/mountd/exports.5 index c5537e978a3..e6f6e2d75ad 100644 --- a/usr.sbin/mountd/exports.5 +++ b/usr.sbin/mountd/exports.5 @@ -498,6 +498,40 @@ and any client within the 131.104.48 subnet is permitted to perform NFSv4 state operations on the server, so long as valid Kerberos credentials are provided. The machine grumpy.cis.uoguelph.ca is permitted to perform NFSv4 state operations on the server using AUTH_SYS credentials, as well as Kerberos ones. +.Pp +In the following example some directories are exported as NFSv3 and NFSv4: +.Bd -literal -offset indent +V4: /wingsdl/nfsv4 +/wingsdl/nfsv4/usr-ports -maproot=root -network 172.16.0.0 -mask 255.255.0.0 +/wingsdl/nfsv4/clasper -maproot=root clasper +.Ed +.Pp +Only one V4: line is needed or allowed to declare where NFSv4 is +rooted. The other lines declare specific exported directories with +their absolute paths given in /etc/exports. +.Pp +The exported directories' paths are used for both v3 and v4. +However, they are interpreted differently for v3 and v4. A client +mount command for usr-ports would use the server-absolute name when +using nfsv3: +.Bd -literal -offset indent +mount server:/wingsdl/nfsv4/usr-ports /mnt/tmp +.Ed +.Pp +A mount command using NFSv4 would use the path relative to the NFSv4 +root: +.Bd -literal -offset indent +mount server:/usr-ports /mnt/tmp +.Ed +.Pp +This also differentiates which version you want if the client can do +both v3 and v4. The former will only ever do a v3 mount and the +latter will only ever do a v4 mount. +.Pp +Note that due to different mount behavior between NFSv3 and NFSv4 a +NFSv4 mount request for a directory that the client does not have +permission for will succeed and read/write access will fail +afterwards, whereas NFSv3 rejects the mount request. .Sh SEE ALSO .Xr nfsv4 4 , .Xr netgroup 5 , From bc235bb54d22b06a7896963800ed70a30754a56d Mon Sep 17 00:00:00 2001 From: Martin Cracauer Date: Mon, 11 Feb 2019 16:31:15 +0000 Subject: [PATCH 33/93] Bump .Dd for today's edit. Thank you Enji Cooper --- usr.sbin/mountd/exports.5 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/usr.sbin/mountd/exports.5 b/usr.sbin/mountd/exports.5 index e6f6e2d75ad..1fe5548cda8 100644 --- a/usr.sbin/mountd/exports.5 +++ b/usr.sbin/mountd/exports.5 @@ -28,7 +28,7 @@ .\" @(#)exports.5 8.3 (Berkeley) 3/29/95 .\" $FreeBSD$ .\" -.Dd May 20, 2017 +.Dd Feb 11, 2019 .Dt EXPORTS 5 .Os .Sh NAME From 804a6541db5fe6a54f653480131e33947ba2b1d1 Mon Sep 17 00:00:00 2001 From: "Andrey V. Elsukov" Date: Mon, 11 Feb 2019 18:10:55 +0000 Subject: [PATCH 34/93] Remove `set' field from state structure and use set from parent rule. Initially it was introduced because parent rule pointer could be freed, and rule's information could become inaccessible. In r341471 this was changed. And now we don't need this information, and also it can become stale. E.g. rule can be moved from one set to another. This can lead to parent's set and state's set will not match. In this case it is possible that static rule will be freed, but dynamic state will not. This can happen when `ipfw delete set N` command is used to delete rules, that were moved to another set. To fix the problem we will use the set number from parent rule. Obtained from: Yandex LLC MFC after: 1 week Sponsored by: Yandex LLC --- sys/netpfil/ipfw/ip_fw_dynamic.c | 129 ++++++++++++++++--------------- 1 file changed, 68 insertions(+), 61 deletions(-) diff --git a/sys/netpfil/ipfw/ip_fw_dynamic.c b/sys/netpfil/ipfw/ip_fw_dynamic.c index 473db72454d..76db67e567f 100644 --- a/sys/netpfil/ipfw/ip_fw_dynamic.c +++ b/sys/netpfil/ipfw/ip_fw_dynamic.c @@ -134,9 +134,8 @@ struct dyn_data { uint32_t hashval; /* hash value used for hash resize */ uint16_t fibnum; /* fib used to send keepalives */ - uint8_t _pad[2]; + uint8_t _pad[3]; uint8_t flags; /* internal flags */ - uint8_t set; /* parent rule set number */ uint16_t rulenum; /* parent rule number */ uint32_t ruleid; /* parent rule id */ @@ -162,8 +161,7 @@ struct dyn_data { struct dyn_parent { void *parent; /* pointer to parent rule */ uint32_t count; /* number of linked states */ - uint8_t _pad; - uint8_t set; /* parent rule set number */ + uint8_t _pad[2]; uint16_t rulenum; /* parent rule number */ uint32_t ruleid; /* parent rule id */ uint32_t hashval; /* hash value used for hash resize */ @@ -506,7 +504,7 @@ static int dyn_lookup_ipv6_state_locked(const struct ipfw_flow_id *, uint32_t, const void *, int, uint32_t, uint16_t); static struct dyn_ipv6_state *dyn_alloc_ipv6_state( const struct ipfw_flow_id *, uint32_t, uint16_t, uint8_t); -static int dyn_add_ipv6_state(void *, uint32_t, uint16_t, uint8_t, +static int dyn_add_ipv6_state(void *, uint32_t, uint16_t, const struct ipfw_flow_id *, uint32_t, const void *, int, uint32_t, struct ipfw_dyn_info *, uint16_t, uint16_t, uint8_t); static void dyn_export_ipv6_state(const struct dyn_ipv6_state *, @@ -527,8 +525,7 @@ static struct dyn_ipv6_state *dyn_lookup_ipv6_parent_locked( const struct ipfw_flow_id *, uint32_t, const void *, uint32_t, uint16_t, uint32_t); static struct dyn_ipv6_state *dyn_add_ipv6_parent(void *, uint32_t, uint16_t, - uint8_t, const struct ipfw_flow_id *, uint32_t, uint32_t, uint32_t, - uint16_t); + const struct ipfw_flow_id *, uint32_t, uint32_t, uint32_t, uint16_t); #endif /* INET6 */ /* Functions to work with limit states */ @@ -539,17 +536,17 @@ static struct dyn_ipv4_state *dyn_lookup_ipv4_parent( static struct dyn_ipv4_state *dyn_lookup_ipv4_parent_locked( const struct ipfw_flow_id *, const void *, uint32_t, uint16_t, uint32_t); static struct dyn_parent *dyn_alloc_parent(void *, uint32_t, uint16_t, - uint8_t, uint32_t); + uint32_t); static struct dyn_ipv4_state *dyn_add_ipv4_parent(void *, uint32_t, uint16_t, - uint8_t, const struct ipfw_flow_id *, uint32_t, uint32_t, uint16_t); + const struct ipfw_flow_id *, uint32_t, uint32_t, uint16_t); static void dyn_tick(void *); static void dyn_expire_states(struct ip_fw_chain *, ipfw_range_tlv *); static void dyn_free_states(struct ip_fw_chain *); -static void dyn_export_parent(const struct dyn_parent *, uint16_t, +static void dyn_export_parent(const struct dyn_parent *, uint16_t, uint8_t, ipfw_dyn_rule *); static void dyn_export_data(const struct dyn_data *, uint16_t, uint8_t, - ipfw_dyn_rule *); + uint8_t, ipfw_dyn_rule *); static uint32_t dyn_update_tcp_state(struct dyn_data *, const struct ipfw_flow_id *, const struct tcphdr *, int); static void dyn_update_proto_state(struct dyn_data *, @@ -562,7 +559,7 @@ static int dyn_lookup_ipv4_state_locked(const struct ipfw_flow_id *, const void *, int, uint32_t, uint16_t); static struct dyn_ipv4_state *dyn_alloc_ipv4_state( const struct ipfw_flow_id *, uint16_t, uint8_t); -static int dyn_add_ipv4_state(void *, uint32_t, uint16_t, uint8_t, +static int dyn_add_ipv4_state(void *, uint32_t, uint16_t, const struct ipfw_flow_id *, const void *, int, uint32_t, struct ipfw_dyn_info *, uint16_t, uint16_t, uint8_t); static void dyn_export_ipv4_state(const struct dyn_ipv4_state *, @@ -1459,7 +1456,7 @@ ipfw_dyn_lookup_state(const struct ip_fw_args *args, const void *ulp, static struct dyn_parent * dyn_alloc_parent(void *parent, uint32_t ruleid, uint16_t rulenum, - uint8_t set, uint32_t hashval) + uint32_t hashval) { struct dyn_parent *limit; @@ -1478,7 +1475,6 @@ dyn_alloc_parent(void *parent, uint32_t ruleid, uint16_t rulenum, limit->parent = parent; limit->ruleid = ruleid; limit->rulenum = rulenum; - limit->set = set; limit->hashval = hashval; limit->expire = time_uptime + V_dyn_short_lifetime; return (limit); @@ -1486,7 +1482,7 @@ dyn_alloc_parent(void *parent, uint32_t ruleid, uint16_t rulenum, static struct dyn_data * dyn_alloc_dyndata(void *parent, uint32_t ruleid, uint16_t rulenum, - uint8_t set, const struct ipfw_flow_id *pkt, const void *ulp, int pktlen, + const struct ipfw_flow_id *pkt, const void *ulp, int pktlen, uint32_t hashval, uint16_t fibnum) { struct dyn_data *data; @@ -1505,7 +1501,6 @@ dyn_alloc_dyndata(void *parent, uint32_t ruleid, uint16_t rulenum, data->parent = parent; data->ruleid = ruleid; data->rulenum = rulenum; - data->set = set; data->fibnum = fibnum; data->hashval = hashval; data->expire = time_uptime + V_dyn_syn_lifetime; @@ -1542,8 +1537,8 @@ dyn_alloc_ipv4_state(const struct ipfw_flow_id *pkt, uint16_t kidx, */ static struct dyn_ipv4_state * dyn_add_ipv4_parent(void *rule, uint32_t ruleid, uint16_t rulenum, - uint8_t set, const struct ipfw_flow_id *pkt, uint32_t hashval, - uint32_t version, uint16_t kidx) + const struct ipfw_flow_id *pkt, uint32_t hashval, uint32_t version, + uint16_t kidx) { struct dyn_ipv4_state *s; struct dyn_parent *limit; @@ -1570,7 +1565,7 @@ dyn_add_ipv4_parent(void *rule, uint32_t ruleid, uint16_t rulenum, } } - limit = dyn_alloc_parent(rule, ruleid, rulenum, set, hashval); + limit = dyn_alloc_parent(rule, ruleid, rulenum, hashval); if (limit == NULL) { DYN_BUCKET_UNLOCK(bucket); return (NULL); @@ -1595,7 +1590,7 @@ dyn_add_ipv4_parent(void *rule, uint32_t ruleid, uint16_t rulenum, static int dyn_add_ipv4_state(void *parent, uint32_t ruleid, uint16_t rulenum, - uint8_t set, const struct ipfw_flow_id *pkt, const void *ulp, int pktlen, + const struct ipfw_flow_id *pkt, const void *ulp, int pktlen, uint32_t hashval, struct ipfw_dyn_info *info, uint16_t fibnum, uint16_t kidx, uint8_t type) { @@ -1620,7 +1615,7 @@ dyn_add_ipv4_state(void *parent, uint32_t ruleid, uint16_t rulenum, } } - data = dyn_alloc_dyndata(parent, ruleid, rulenum, set, pkt, ulp, + data = dyn_alloc_dyndata(parent, ruleid, rulenum, pkt, ulp, pktlen, hashval, fibnum); if (data == NULL) { DYN_BUCKET_UNLOCK(bucket); @@ -1673,8 +1668,8 @@ dyn_alloc_ipv6_state(const struct ipfw_flow_id *pkt, uint32_t zoneid, */ static struct dyn_ipv6_state * dyn_add_ipv6_parent(void *rule, uint32_t ruleid, uint16_t rulenum, - uint8_t set, const struct ipfw_flow_id *pkt, uint32_t zoneid, - uint32_t hashval, uint32_t version, uint16_t kidx) + const struct ipfw_flow_id *pkt, uint32_t zoneid, uint32_t hashval, + uint32_t version, uint16_t kidx) { struct dyn_ipv6_state *s; struct dyn_parent *limit; @@ -1701,7 +1696,7 @@ dyn_add_ipv6_parent(void *rule, uint32_t ruleid, uint16_t rulenum, } } - limit = dyn_alloc_parent(rule, ruleid, rulenum, set, hashval); + limit = dyn_alloc_parent(rule, ruleid, rulenum, hashval); if (limit == NULL) { DYN_BUCKET_UNLOCK(bucket); return (NULL); @@ -1726,8 +1721,8 @@ dyn_add_ipv6_parent(void *rule, uint32_t ruleid, uint16_t rulenum, static int dyn_add_ipv6_state(void *parent, uint32_t ruleid, uint16_t rulenum, - uint8_t set, const struct ipfw_flow_id *pkt, uint32_t zoneid, - const void *ulp, int pktlen, uint32_t hashval, struct ipfw_dyn_info *info, + const struct ipfw_flow_id *pkt, uint32_t zoneid, const void *ulp, + int pktlen, uint32_t hashval, struct ipfw_dyn_info *info, uint16_t fibnum, uint16_t kidx, uint8_t type) { struct dyn_ipv6_state *s; @@ -1751,7 +1746,7 @@ dyn_add_ipv6_state(void *parent, uint32_t ruleid, uint16_t rulenum, } } - data = dyn_alloc_dyndata(parent, ruleid, rulenum, set, pkt, ulp, + data = dyn_alloc_dyndata(parent, ruleid, rulenum, pkt, ulp, pktlen, hashval, fibnum); if (data == NULL) { DYN_BUCKET_UNLOCK(bucket); @@ -1801,8 +1796,7 @@ dyn_get_parent_state(const struct ipfw_flow_id *pkt, uint32_t zoneid, DYNSTATE_CRITICAL_EXIT(); s = dyn_add_ipv4_parent(rule, rule->id, - rule->rulenum, rule->set, pkt, hashval, - version, kidx); + rule->rulenum, pkt, hashval, version, kidx); if (s == NULL) return (NULL); /* Now we are in critical section again. */ @@ -1825,8 +1819,8 @@ dyn_get_parent_state(const struct ipfw_flow_id *pkt, uint32_t zoneid, DYNSTATE_CRITICAL_EXIT(); s = dyn_add_ipv6_parent(rule, rule->id, - rule->rulenum, rule->set, pkt, zoneid, hashval, - version, kidx); + rule->rulenum, pkt, zoneid, hashval, version, + kidx); if (s == NULL) return (NULL); /* Now we are in critical section again. */ @@ -1869,8 +1863,7 @@ dyn_get_parent_state(const struct ipfw_flow_id *pkt, uint32_t zoneid, static int dyn_install_state(const struct ipfw_flow_id *pkt, uint32_t zoneid, - uint16_t fibnum, const void *ulp, int pktlen, void *rule, - uint32_t ruleid, uint16_t rulenum, uint8_t set, + uint16_t fibnum, const void *ulp, int pktlen, struct ip_fw *rule, struct ipfw_dyn_info *info, uint32_t limit, uint16_t limit_mask, uint16_t kidx, uint8_t type) { @@ -1934,11 +1927,11 @@ dyn_install_state(const struct ipfw_flow_id *pkt, uint32_t zoneid, hashval = hash_packet(pkt); if (IS_IP4_FLOW_ID(pkt)) - ret = dyn_add_ipv4_state(rule, ruleid, rulenum, set, pkt, + ret = dyn_add_ipv4_state(rule, rule->id, rule->rulenum, pkt, ulp, pktlen, hashval, info, fibnum, kidx, type); #ifdef INET6 else if (IS_IP6_FLOW_ID(pkt)) - ret = dyn_add_ipv6_state(rule, ruleid, rulenum, set, pkt, + ret = dyn_add_ipv6_state(rule, rule->id, rule->rulenum, pkt, zoneid, ulp, pktlen, hashval, info, fibnum, kidx, type); #endif /* INET6 */ else @@ -2011,8 +2004,8 @@ ipfw_dyn_install_state(struct ip_fw_chain *chain, struct ip_fw *rule, #ifdef INET6 IS_IP6_FLOW_ID(&args->f_id) ? dyn_getscopeid(args): #endif - 0, M_GETFIB(args->m), ulp, pktlen, rule, rule->id, rule->rulenum, - rule->set, info, limit, limit_mask, cmd->o.arg1, cmd->o.opcode)); + 0, M_GETFIB(args->m), ulp, pktlen, rule, info, limit, + limit_mask, cmd->o.arg1, cmd->o.opcode)); } /* @@ -2197,17 +2190,19 @@ dyn_match_ipv4_state(struct ip_fw_chain *ch, struct dyn_ipv4_state *s, struct ip_fw *rule; int ret; - if (s->type == O_LIMIT_PARENT) - return (dyn_match_range(s->limit->rulenum, - s->limit->set, rt)); - - ret = dyn_match_range(s->data->rulenum, s->data->set, rt); - if (ret == 0 || V_dyn_keep_states == 0 || ret > 1) - return (ret); + if (s->type == O_LIMIT_PARENT) { + rule = s->limit->parent; + return (dyn_match_range(s->limit->rulenum, rule->set, rt)); + } rule = s->data->parent; if (s->type == O_LIMIT) rule = ((struct dyn_ipv4_state *)rule)->limit->parent; + + ret = dyn_match_range(s->data->rulenum, rule->set, rt); + if (ret == 0 || V_dyn_keep_states == 0 || ret > 1) + return (ret); + dyn_acquire_rule(ch, s->data, rule, s->kidx); return (0); } @@ -2220,17 +2215,19 @@ dyn_match_ipv6_state(struct ip_fw_chain *ch, struct dyn_ipv6_state *s, struct ip_fw *rule; int ret; - if (s->type == O_LIMIT_PARENT) - return (dyn_match_range(s->limit->rulenum, - s->limit->set, rt)); - - ret = dyn_match_range(s->data->rulenum, s->data->set, rt); - if (ret == 0 || V_dyn_keep_states == 0 || ret > 1) - return (ret); + if (s->type == O_LIMIT_PARENT) { + rule = s->limit->parent; + return (dyn_match_range(s->limit->rulenum, rule->set, rt)); + } rule = s->data->parent; if (s->type == O_LIMIT) rule = ((struct dyn_ipv6_state *)rule)->limit->parent; + + ret = dyn_match_range(s->data->rulenum, rule->set, rt); + if (ret == 0 || V_dyn_keep_states == 0 || ret > 1) + return (ret); + dyn_acquire_rule(ch, s->data, rule, s->kidx); return (0); } @@ -2898,7 +2895,7 @@ ipfw_is_dyn_rule(struct ip_fw *rule) } static void -dyn_export_parent(const struct dyn_parent *p, uint16_t kidx, +dyn_export_parent(const struct dyn_parent *p, uint16_t kidx, uint8_t set, ipfw_dyn_rule *dst) { @@ -2910,9 +2907,9 @@ dyn_export_parent(const struct dyn_parent *p, uint16_t kidx, /* 'rule' is used to pass up the rule number and set */ memcpy(&dst->rule, &p->rulenum, sizeof(p->rulenum)); + /* store set number into high word of dst->rule pointer. */ - memcpy((char *)&dst->rule + sizeof(p->rulenum), &p->set, - sizeof(p->set)); + memcpy((char *)&dst->rule + sizeof(p->rulenum), &set, sizeof(set)); /* unused fields */ dst->pcnt = 0; @@ -2931,7 +2928,7 @@ dyn_export_parent(const struct dyn_parent *p, uint16_t kidx, static void dyn_export_data(const struct dyn_data *data, uint16_t kidx, uint8_t type, - ipfw_dyn_rule *dst) + uint8_t set, ipfw_dyn_rule *dst) { dst->dyn_type = type; @@ -2943,9 +2940,9 @@ dyn_export_data(const struct dyn_data *data, uint16_t kidx, uint8_t type, /* 'rule' is used to pass up the rule number and set */ memcpy(&dst->rule, &data->rulenum, sizeof(data->rulenum)); + /* store set number into high word of dst->rule pointer. */ - memcpy((char *)&dst->rule + sizeof(data->rulenum), &data->set, - sizeof(data->set)); + memcpy((char *)&dst->rule + sizeof(data->rulenum), &set, sizeof(set)); dst->state = data->state; if (data->flags & DYN_REFERENCED) @@ -2967,13 +2964,18 @@ dyn_export_data(const struct dyn_data *data, uint16_t kidx, uint8_t type, static void dyn_export_ipv4_state(const struct dyn_ipv4_state *s, ipfw_dyn_rule *dst) { + struct ip_fw *rule; switch (s->type) { case O_LIMIT_PARENT: - dyn_export_parent(s->limit, s->kidx, dst); + rule = s->limit->parent; + dyn_export_parent(s->limit, s->kidx, rule->set, dst); break; default: - dyn_export_data(s->data, s->kidx, s->type, dst); + rule = s->data->parent; + if (s->type == O_LIMIT) + rule = ((struct dyn_ipv4_state *)rule)->limit->parent; + dyn_export_data(s->data, s->kidx, s->type, rule->set, dst); } dst->id.dst_ip = s->dst; @@ -2994,13 +2996,18 @@ dyn_export_ipv4_state(const struct dyn_ipv4_state *s, ipfw_dyn_rule *dst) static void dyn_export_ipv6_state(const struct dyn_ipv6_state *s, ipfw_dyn_rule *dst) { + struct ip_fw *rule; switch (s->type) { case O_LIMIT_PARENT: - dyn_export_parent(s->limit, s->kidx, dst); + rule = s->limit->parent; + dyn_export_parent(s->limit, s->kidx, rule->set, dst); break; default: - dyn_export_data(s->data, s->kidx, s->type, dst); + rule = s->data->parent; + if (s->type == O_LIMIT) + rule = ((struct dyn_ipv6_state *)rule)->limit->parent; + dyn_export_data(s->data, s->kidx, s->type, rule->set, dst); } dst->id.src_ip6 = s->src; From 22427daf7e84816c26941fb2391da913b0a6ecb7 Mon Sep 17 00:00:00 2001 From: Edward Tomasz Napierala Date: Mon, 11 Feb 2019 20:46:32 +0000 Subject: [PATCH 35/93] Add explanation of branches to the ports(7) man page. Reviewed by: matthew@, freebsd@mhka.no MFC after: 2 weeks Sponsored by: DARPA, AFRL Differential Revision: https://reviews.freebsd.org/D19146 --- share/man/man7/ports.7 | 53 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 3 deletions(-) diff --git a/share/man/man7/ports.7 b/share/man/man7/ports.7 index 0f9bc2e6f99..d1df91fd337 100644 --- a/share/man/man7/ports.7 +++ b/share/man/man7/ports.7 @@ -25,7 +25,7 @@ .\" .\" $FreeBSD$ .\" -.Dd January 8, 2019 +.Dd February 11, 2019 .Dt PORTS 7 .Os .Sh NAME @@ -38,8 +38,6 @@ Ports Collection offers a simple way to compile and install third party applications. It is also used to build packages, to be installed using .Xr pkg 8 . -It can be installed and updated using -.Xr portsnap 8 . .Pp The ports tree, typically located at .Pa /usr/ports , @@ -62,6 +60,55 @@ Afterwards, .Dq Li "make install" installs the application. .Pp +The +.Fx +Ports Collection is maintained in several branches, which differ mostly +by versions of software provided: the +.Em head +branch contains all the latest changes, while the +.Em quarterly +branches only provide critical fixes. +The +.Em head +branch can be installed or updated using either +.Xr portsnap 8 , +or from Subversion repository at: +.Pp +.Lk https://svn.FreeBSD.org/ports/head +.Pp +The +.Em quarterly +branches can be found in Subversion in the +.Fa branches/ +subdirectory, eg: +.Pp +.Lk https://svn.FreeBSD.org/ports/branches/2019Q1 +.Pp +It is generally a good idea to use the +.Nm +branch that matches the +.Xr pkg 8 +repository being used. +By default, for +.Fx CURRENT +the +.Xr pkg 8 +is configured to install packages built from the +.Em head +branch, while for +.Fx STABLE +or RELEASE versions it is configured to install packages built from +the latest +.Em quarterly +branch. +Currently configured +.Xr pkg 8 +repository can be verified by looking at the +.Em url +field in +.Cm pkg -vv +output. +.Pp For more information about using ports, see the .Dq "Packages and Ports" section in From 967b2dce026f8430e2c23c1b6a927f42a46ec84d Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Mon, 11 Feb 2019 20:47:09 +0000 Subject: [PATCH 36/93] Enable PCI BAR reallocation by default. When pci_realloc_bars was first added, the intention was to eventually enable it by default, but it was left disabled to preserve existing behavior. The setting is pretty conservative in that it does not attempt to allocate resources for BARs that the BIOS/firmware leaves disabled. It only attempts to reallocate resources for a BAR that the firmware programmed during boot but that conflicts with another resource during the kernel's device scan. PR 221350 is an example of a machine that this knob fixes. Reviewed by: imp Differential Revision: https://reviews.freebsd.org/D18965 --- sys/dev/pci/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/dev/pci/pci.c b/sys/dev/pci/pci.c index 5474c8a6614..3e74436e5e5 100644 --- a/sys/dev/pci/pci.c +++ b/sys/dev/pci/pci.c @@ -341,7 +341,7 @@ SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RWTUN, " enable these bits correctly. We'd like to do this all the time, but" " there are some peripherals that this causes problems with."); -static int pci_do_realloc_bars = 0; +static int pci_do_realloc_bars = 1; SYSCTL_INT(_hw_pci, OID_AUTO, realloc_bars, CTLFLAG_RWTUN, &pci_do_realloc_bars, 0, "Attempt to allocate a new range for any BARs whose original " From f95509a489bc55a3aefd0e650509067752a6d279 Mon Sep 17 00:00:00 2001 From: Brooks Davis Date: Mon, 11 Feb 2019 21:31:26 +0000 Subject: [PATCH 37/93] mdmfs: Fix many bugs in automatic md(4) creation. This code allocated a correctly sized buffer, read past the end of the source buffer, writing off the end of the target buffer, and then writing a '\0' terminator past the end of the target buffer (in the wrong place). It then leaked the buffer. Switch to a statically sized buffer on the stack and update the source pointer and length before use so the correct things are copied. Fix a logic error in the checks that the format of the line is as expected and move on out of an assert. Remove an unneeded close(). fclose() closes the descriptor. Found with: CheriABI Obtained from: CheriBSD Reviewed by: kib, jhb, markj Differential Revision: https://reviews.freebsd.org/D19122 --- sbin/mdmfs/mdmfs.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/sbin/mdmfs/mdmfs.c b/sbin/mdmfs/mdmfs.c index b85d7dd4cd9..6e4a39611a8 100644 --- a/sbin/mdmfs/mdmfs.c +++ b/sbin/mdmfs/mdmfs.c @@ -444,7 +444,8 @@ static void do_mdconfig_attach_au(const char *args, const enum md_types mdtype) { const char *ta; /* Type arg. */ - char *linep, *linebuf; /* Line pointer, line buffer. */ + char *linep; + char linebuf[12]; /* 32-bit unit (10) + '\n' (1) + '\0' (1) */ int fd; /* Standard output of mdconfig invocation. */ FILE *sfd; int rv; @@ -479,14 +480,15 @@ do_mdconfig_attach_au(const char *args, const enum md_types mdtype) if (sfd == NULL) err(1, "fdopen"); linep = fgetln(sfd, &linelen); - if (linep == NULL && linelen < mdnamelen + 1) - errx(1, "unexpected output from mdconfig (attach)"); /* If the output format changes, we want to know about it. */ - assert(strncmp(linep, mdname, mdnamelen) == 0); - linebuf = malloc(linelen - mdnamelen + 1); - assert(linebuf != NULL); + if (linep == NULL || linelen <= mdnamelen + 1 || + linelen - mdnamelen >= sizeof(linebuf) || + strncmp(linep, mdname, mdnamelen) != 0) + errx(1, "unexpected output from mdconfig (attach)"); + linep += mdnamelen; + linelen -= mdnamelen; /* Can't use strlcpy because linep is not NULL-terminated. */ - strncpy(linebuf, linep + mdnamelen, linelen); + strncpy(linebuf, linep, linelen); linebuf[linelen] = '\0'; ul = strtoul(linebuf, &p, 10); if (ul == ULONG_MAX || *p != '\n') @@ -494,7 +496,6 @@ do_mdconfig_attach_au(const char *args, const enum md_types mdtype) unit = ul; fclose(sfd); - close(fd); } /* From 3420c04b44d4568d2eba31feeba2242bf0e87e86 Mon Sep 17 00:00:00 2001 From: David Bright Date: Mon, 11 Feb 2019 22:09:26 +0000 Subject: [PATCH 38/93] CID 1009492: Logically dead code in sys/cam/scsi/scsi_xpt.c In `probedone()`, for the `PROBE_REPORT_LUNS` case, all paths that fall to the bottom of the case set `lp` to `NULL`, so the test for a non-NULL value of `lp` and call to `free()` if true is dead code as the test can never be true. Fix by eliminating the whole if statement. To guard against a possible future change that accidentally violates this assumption, use a `KASSERT()` to catch if `lp` is non-NULL. Reviewed by: cem MFC after: 1 week Sponsored by: Dell EMC Isilon Differential Revision: https://reviews.freebsd.org/D19109 --- sys/cam/scsi/scsi_xpt.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sys/cam/scsi/scsi_xpt.c b/sys/cam/scsi/scsi_xpt.c index 39eb5782215..66ff056ff2e 100644 --- a/sys/cam/scsi/scsi_xpt.c +++ b/sys/cam/scsi/scsi_xpt.c @@ -1385,6 +1385,12 @@ out: probe_purge_old(path, lp, softc->flags); lp = NULL; } + /* The processing above should either exit via a `goto + * out` or leave the `lp` variable `NULL` and (if + * applicable) `free()` the storage to which it had + * pointed. Assert here that is the case. + */ + KASSERT(lp == NULL, ("%s: lp is not NULL", __func__)); inq_buf = &path->device->inq_data; if (path->device->flags & CAM_DEV_INQUIRY_DATA_VALID && (SID_QUAL(inq_buf) == SID_QUAL_LU_CONNECTED || @@ -1398,9 +1404,6 @@ out: xpt_schedule(periph, priority); goto out; } - if (lp) { - free(lp, M_CAMXPT); - } PROBE_SET_ACTION(softc, PROBE_INVALID); xpt_release_ccb(done_ccb); break; From 997667302f638219587f1bbc55051716a80fe863 Mon Sep 17 00:00:00 2001 From: Patrick Kelsey Date: Mon, 11 Feb 2019 22:58:43 +0000 Subject: [PATCH 39/93] Fix the fix added in r343287 for spurious HFSC bandwidth check errors The logic added in r343287 to avoid false-positive sum-of-child-bandwidth check errors for HFSC queues has a bug in it that causes the upperlimit service curve of an HFSC queue to be pulled down to its parent's linkshare service curve if it happens to be above it. Upon further inspection/reflection, this generic sum-of-child-bandwidths check does not need to be fixed for HFSC - it needs to be skipped. For HFSC, the equivalent check is to ensure the sum of child linkshare service curves are at or below the parent's linkshare service curve, and this check is already being performed by eval_pfqueue_hfsc(). This commit reverts the affected parts of r343287 and adds new logic to skip the generic sum-of-child-bandwidths check for HFSC. MFC after: 1 day Sponsored by: RG Nets Differential Revision: https://reviews.freebsd.org/D19124 --- sbin/pfctl/pfctl_altq.c | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/sbin/pfctl/pfctl_altq.c b/sbin/pfctl/pfctl_altq.c index e5dad956f83..f23fe057f70 100644 --- a/sbin/pfctl/pfctl_altq.c +++ b/sbin/pfctl/pfctl_altq.c @@ -429,34 +429,25 @@ eval_pfqueue(struct pfctl *pf, struct pf_altq *pa, struct node_queue_bw *bw, if (pa->qlimit == 0) pa->qlimit = DEFAULT_QLIMIT; - if (eval_queue_opts(pa, opts, - parent == NULL ? pa->ifbandwidth : parent->pa.bandwidth)) - return (1); - if (pa->scheduler == ALTQT_CBQ || pa->scheduler == ALTQT_HFSC || pa->scheduler == ALTQT_FAIRQ) { pa->bandwidth = eval_bwspec(bw, parent == NULL ? pa->ifbandwidth : parent->pa.bandwidth); - /* - * For HFSC, if the linkshare service curve m2 parameter is - * set, it overrides the provided queue bandwidth parameter, - * so adjust the queue bandwidth parameter accordingly here - * to avoid false positives in the total child bandwidth - * check below. - */ - if ((pa->scheduler == ALTQT_HFSC) && - (pa->pq_u.hfsc_opts.lssc_m2 != 0)) { - pa->bandwidth = pa->pq_u.hfsc_opts.lssc_m2; - } - if (pa->bandwidth > pa->ifbandwidth) { fprintf(stderr, "bandwidth for %s higher than " "interface\n", pa->qname); return (1); } - /* check the sum of the child bandwidth is under parent's */ - if (parent != NULL) { + /* + * If not HFSC, then check that the sum of the child + * bandwidths is less than the parent's bandwidth. For + * HFSC, the equivalent concept is to check that the sum of + * the child linkshare service curves are under the parent's + * linkshare service curve, and that check is performed by + * eval_pfqueue_hfsc(). + */ + if ((parent != NULL) && (pa->scheduler != ALTQT_HFSC)) { if (pa->bandwidth > parent->pa.bandwidth) { warnx("bandwidth for %s higher than parent", pa->qname); @@ -472,6 +463,10 @@ eval_pfqueue(struct pfctl *pf, struct pf_altq *pa, struct node_queue_bw *bw, } } + if (eval_queue_opts(pa, opts, + parent == NULL ? pa->ifbandwidth : parent->pa.bandwidth)) + return (1); + if (parent != NULL) parent->meta.children++; From 446ae812b088833dedcaeee44d2556a9fb5545d5 Mon Sep 17 00:00:00 2001 From: Kyle Evans Date: Tue, 12 Feb 2019 02:16:21 +0000 Subject: [PATCH 40/93] libbe(3): Belatedly note the BE_DESTROY_ORIGIN option added in r343977 X-MFC-With: r343977 --- lib/libbe/libbe.3 | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/lib/libbe/libbe.3 b/lib/libbe/libbe.3 index 5ee22db811d..ada024374eb 100644 --- a/lib/libbe/libbe.3 +++ b/lib/libbe/libbe.3 @@ -28,7 +28,7 @@ .\" .\" $FreeBSD$ .\" -.Dd November 21, 2018 +.Dd February 11, 2019 .Dt LIBBE 3 .Os .Sh NAME @@ -253,6 +253,13 @@ It will not destroy a mounted boot environment unless the .Dv BE_DESTROY_FORCE option is set in .Fa options . +If the +.Dv BE_DESTROY_ORIGIN +option is set in +.Fa options , +the +.Fn be_destroy +function will destroy the origin snapshot to this boot environment as well. .Pp The .Fn be_nicenum From 65564a5e7696ff46653edca78cff4eb07b229c3d Mon Sep 17 00:00:00 2001 From: Kevin Lo Date: Tue, 12 Feb 2019 02:48:16 +0000 Subject: [PATCH 41/93] Remove duplicate vendor id in r334650. Intenso doesn't have a USB VID. --- sys/dev/usb/quirk/usb_quirk.c | 2 +- sys/dev/usb/usbdevs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sys/dev/usb/quirk/usb_quirk.c b/sys/dev/usb/quirk/usb_quirk.c index 1c1d607d8b6..9488ced366c 100644 --- a/sys/dev/usb/quirk/usb_quirk.c +++ b/sys/dev/usb/quirk/usb_quirk.c @@ -273,7 +273,7 @@ static struct usb_quirk_entry usb_quirks[USB_DEV_QUIRKS_MAX] = { UQ_MSC_FORCE_PROTO_RBC), USB_QUIRK(INSYSTEM, STORAGE_V2, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_CBI, UQ_MSC_FORCE_PROTO_RBC), - USB_QUIRK(INTENSO, MEMORY_BOX, 0x0000, 0xffff, UQ_MSC_NO_INQUIRY), + USB_QUIRK(VIALABS, VL701, 0x0000, 0xffff, UQ_MSC_NO_INQUIRY), USB_QUIRK(IODATA, IU_CD2, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI), USB_QUIRK(IODATA, DVR_UEH8, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, diff --git a/sys/dev/usb/usbdevs b/sys/dev/usb/usbdevs index 6a669e83c64..7efcee42664 100644 --- a/sys/dev/usb/usbdevs +++ b/sys/dev/usb/usbdevs @@ -767,7 +767,6 @@ vendor SIMTEC 0x20df Simtec Electronics vendor TRENDNET 0x20f4 TRENDnet vendor RTSYSTEMS 0x2100 RT Systems vendor DLINK4 0x2101 D-Link -vendor INTENSO 0x2109 INTENSO vendor VIALABS 0x2109 VIA Labs vendor ERICSSON 0x2282 Ericsson vendor MOTOROLA2 0x22b8 Motorola @@ -4738,6 +4737,7 @@ product VIA USB2IDEBRIDGE 0x6204 USB 2.0 IDE Bridge /* VIA Labs */ product VIALABS USB30SATABRIDGE 0x0700 USB 3.0 SATA Bridge +product VIALABS VL701 0x0701 VL701 USB 3.0 SATA Bridge /* Vaisala products */ product VAISALA CABLE 0x0200 USB Interface cable From ec637bb957586417c62fca3dabc8861042833d94 Mon Sep 17 00:00:00 2001 From: Kevin Lo Date: Tue, 12 Feb 2019 02:55:25 +0000 Subject: [PATCH 42/93] Remove entry for Intenso product. --- sys/dev/usb/usbdevs | 3 --- 1 file changed, 3 deletions(-) diff --git a/sys/dev/usb/usbdevs b/sys/dev/usb/usbdevs index 7efcee42664..fb89aee066e 100644 --- a/sys/dev/usb/usbdevs +++ b/sys/dev/usb/usbdevs @@ -2571,9 +2571,6 @@ product INSYSTEM ISD105 0x0202 IDE Adapter ISD105 product INSYSTEM USBCABLE 0x081a USB cable product INSYSTEM STORAGE_V2 0x5701 USB Storage Adapter V2 -/* Intenso products */ -product INTENSO MEMORY_BOX 0x0701 External disk - /* Intel products */ product INTEL EASYPC_CAMERA 0x0110 Easy PC Camera product INTEL TESTBOARD 0x9890 82930 test board From 7bc2a58ea65b17d2db4be731ad8dbfa79be68e6d Mon Sep 17 00:00:00 2001 From: Enji Cooper Date: Tue, 12 Feb 2019 03:32:40 +0000 Subject: [PATCH 43/93] Bump `__FreeBSD_version__` for r343891 This will allow upstream consumers, e.g., capsicum-test and third-party packages (via ports(7)), to test for a specific `__FreeBSD_version__` and expect `renameat(2)` to be functional. PR: 222258 Approved by: emaste (mentor) Reviewed by: emaste MFC with: r343891 Differential Revision: https://reviews.freebsd.org/D19154 --- sys/sys/param.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/sys/param.h b/sys/sys/param.h index 07b113f2747..d4bc7fe0c2c 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -60,7 +60,7 @@ * in the range 5 to 9. */ #undef __FreeBSD_version -#define __FreeBSD_version 1300010 /* Master, propagated to newvers */ +#define __FreeBSD_version 1300011 /* Master, propagated to newvers */ /* * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD, From 6929b7d1abfaea09036fc719b9778b4d0930ac8d Mon Sep 17 00:00:00 2001 From: "Pedro F. Giffuni" Date: Tue, 12 Feb 2019 04:33:05 +0000 Subject: [PATCH 44/93] UMA: unsign some variables related to allocation in hash_alloc(). As a followup to r343673, unsign some variables related to allocation since the hashsize cannot be negative. This gives a bit more space to handle bigger allocations and avoid some implicit casting. While here also unsign uh_hashmask, it makes little sense to keep that signed. MFC after: 2 weeks Differential Revision: https://reviews.freebsd.org/D19148 --- sys/vm/uma_core.c | 14 +++++++------- sys/vm/uma_int.h | 6 +++--- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c index 6be15c47c1b..b1f7016281d 100644 --- a/sys/vm/uma_core.c +++ b/sys/vm/uma_core.c @@ -622,7 +622,7 @@ zone_timeout(uma_zone_t zone) static int hash_alloc(struct uma_hash *hash) { - int oldsize; + u_int oldsize; size_t alloc; oldsize = hash->uh_hashsize; @@ -666,8 +666,8 @@ static int hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash) { uma_slab_t slab; - int hval; - int i; + u_int hval; + u_int idx; if (!newhash->uh_slab_hash) return (0); @@ -680,10 +680,10 @@ hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash) * full rehash. */ - for (i = 0; i < oldhash->uh_hashsize; i++) - while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) { - slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]); - SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink); + for (idx = 0; idx < oldhash->uh_hashsize; idx++) + while (!SLIST_EMPTY(&oldhash->uh_slab_hash[idx])) { + slab = SLIST_FIRST(&oldhash->uh_slab_hash[idx]); + SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[idx], us_hlink); hval = UMA_HASH(newhash, slab->us_data); SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval], slab, us_hlink); diff --git a/sys/vm/uma_int.h b/sys/vm/uma_int.h index 557becfcca7..0cf965d0533 100644 --- a/sys/vm/uma_int.h +++ b/sys/vm/uma_int.h @@ -179,8 +179,8 @@ SLIST_HEAD(slabhead, uma_slab); struct uma_hash { struct slabhead *uh_slab_hash; /* Hash table for slabs */ - int uh_hashsize; /* Current size of the hash table */ - int uh_hashmask; /* Mask used during hashing */ + u_int uh_hashsize; /* Current size of the hash table */ + u_int uh_hashmask; /* Mask used during hashing */ }; /* @@ -453,7 +453,7 @@ static __inline uma_slab_t hash_sfind(struct uma_hash *hash, uint8_t *data) { uma_slab_t slab; - int hval; + u_int hval; hval = UMA_HASH(hash, data); From 2d8cb2f494750935b1126244f9a88bb8eb1e80a0 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Tue, 12 Feb 2019 05:15:36 +0000 Subject: [PATCH 45/93] termcap: Add an entry for kitty The project is here: https://github.com/kovidgoyal/kitty/ I created a port (which now needs updating): https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=233010 If only we could use terminfo :( MFC after: 5 days Approved by: bapt Differential Revision: https://reviews.freebsd.org/D19060 --- share/termcap/termcap | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/share/termcap/termcap b/share/termcap/termcap index 763e6a6bef7..46044da03c5 100644 --- a/share/termcap/termcap +++ b/share/termcap/termcap @@ -4746,6 +4746,29 @@ st-meta-256color|simpleterm with meta key and 256 colors:\ :is=\E[4l\E>\E[?1034h:mm=\E[?1034h:mo=\E[?1034l:\ :rs=\E[4l\E>\E[?1034h:tc=st-256color: + +# From version 0.13.3 +xterm-kitty|KovId's TTY:\ + :tc=xterm-256color:tc=kitty+common: + +kitty+common|KovId's TTY common properties:\ + :am:hs:km:mi:ms:xn:\ + :co#80:it#8:li#24:\ + :AL=\E[%dL:DC=\E[%dP:DL=\E[%dM:DO=\E[%dB:IC=\E[%d@:K1=:K3=:\ + :K4=:K5=:LE=\E[%dD:RI=\E[%dC:SF=\E[%dS:SR=\E[%dT:UP=\E[%dA:\ + :ae=\E(B:al=\E[L:as=\E(0:bl=^G:bt=\E[Z:cd=\E[J:ce=\E[K:\ + :cl=\E[H\E[2J:cm=\E[%i%d;%dH:cr=\r:cs=\E[%i%d;%dr:\ + :ct=\E[3g:dc=\E[P:dl=\E[M:do=\n:ds=\E]2;\007:ec=\E[%dX:\ + :ei=\E[4l:fs=^G:ho=\E[H:im=\E[4h:k1=\EOP:k2=\EOQ:k3=\EOR:\ + :k4=\EOS:k5=\E[15~:k6=\E[17~:k7=\E[18~:k8=\E[19~:\ + :k9=\E[20~:kD=\E[3~:kI=\E[2~:kN=\E[6~:kP=\E[5~:kb=\177:\ + :kd=\EOB:ke=\E[?1l:kh=\EOH:kl=\EOD:kr=\EOC:ks=\E[?1h:\ + :ku=\EOA:le=^H:md=\E[1m:me=\E[0m:mh=\E[2m:mr=\E[7m:nd=\E[C:\ + :rc=\E8:sc=\E7:se=\E[27m:sf=\n:so=\E[7m:sr=\EM:st=\EH:ta=^I:\ + :te=\E[?1049l:ti=\E[?1049h:ts=\E]2;:ue=\E[24m:up=\E[A:\ + :us=\E[4m:vb=\E[?5h\E[?5l:ve=\E[?12l\E[?25h:vi=\E[?25l:\ + :vs=\E[?12;25h: + # # END OF TERMCAP # ------------------------ From 4f3128086b601d527e9cca520bdd95ce51a2cac7 Mon Sep 17 00:00:00 2001 From: Li-Wen Hsu Date: Tue, 12 Feb 2019 08:16:05 +0000 Subject: [PATCH 46/93] Remove empty files Approved by: markj (mentor) Sponsored by: The FreeBSD Foundation --- sys/contrib/dev/iwm/iwm-3160-9.fw.uu | 0 sys/contrib/dev/iwm/iwm-7260-9.fw.uu | 0 sys/contrib/dev/iwm/iwm-7265-9.fw.uu | 0 3 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 sys/contrib/dev/iwm/iwm-3160-9.fw.uu delete mode 100644 sys/contrib/dev/iwm/iwm-7260-9.fw.uu delete mode 100644 sys/contrib/dev/iwm/iwm-7265-9.fw.uu diff --git a/sys/contrib/dev/iwm/iwm-3160-9.fw.uu b/sys/contrib/dev/iwm/iwm-3160-9.fw.uu deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/sys/contrib/dev/iwm/iwm-7260-9.fw.uu b/sys/contrib/dev/iwm/iwm-7260-9.fw.uu deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/sys/contrib/dev/iwm/iwm-7265-9.fw.uu b/sys/contrib/dev/iwm/iwm-7265-9.fw.uu deleted file mode 100644 index e69de29bb2d..00000000000 From aef0641755fbc434b8f9deef090dc02b7e4fe949 Mon Sep 17 00:00:00 2001 From: Michael Tuexen Date: Tue, 12 Feb 2019 10:17:21 +0000 Subject: [PATCH 47/93] Improve input validation for raw IPv4 socket using the IP_HDRINCL option. This issue was found by running syzkaller on OpenBSD. Greg Steuck made me aware that the problem might also exist on FreeBSD. Reported by: Greg Steuck MFC after: 1 month Differential Revision: https://reviews.freebsd.org/D18834 --- sys/netinet/raw_ip.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c index aa9d3d95356..aa972614e52 100644 --- a/sys/netinet/raw_ip.c +++ b/sys/netinet/raw_ip.c @@ -454,6 +454,8 @@ rip_output(struct mbuf *m, struct socket *so, ...) u_long dst; int flags = ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0) | IP_ALLOWBROADCAST; + int cnt; + u_char opttype, optlen, *cp; va_start(ap, so); dst = va_arg(ap, u_long); @@ -528,6 +530,34 @@ rip_output(struct mbuf *m, struct socket *so, ...) m_freem(m); return (EINVAL); } + /* + * Don't allow IP options which do not have the required + * structure as specified in section 3.1 of RFC 791 on + * pages 15-23. + */ + cp = (u_char *)(ip + 1); + cnt = (ip->ip_hl << 2) - sizeof (struct ip); + for (; cnt > 0; cnt -= optlen, cp += optlen) { + opttype = cp[IPOPT_OPTVAL]; + if (opttype == IPOPT_EOL) + break; + if (opttype == IPOPT_NOP) { + optlen = 1; + continue; + } + if (cnt < IPOPT_OLEN + sizeof(u_char)) { + INP_RUNLOCK(inp); + m_freem(m); + return (EINVAL); + } + optlen = cp[IPOPT_OLEN]; + if (optlen < IPOPT_OLEN + sizeof(u_char) || + optlen > cnt) { + INP_RUNLOCK(inp); + m_freem(m); + return (EINVAL); + } + } /* * This doesn't allow application to specify ID of zero, * but we got this limitation from the beginning of history. From b8efbfb9d3ea68e540e15d6ef5b10c0ab03a3e8b Mon Sep 17 00:00:00 2001 From: Leandro Lupori Date: Tue, 12 Feb 2019 11:29:03 +0000 Subject: [PATCH 48/93] [ppc64] prevent infinite loop on icache sync At moea64_sync_icache(), when the 'va' argument has page size alignment, round_page() will return the same value as 'va'. This would cause 'len' to be 0 and thus an infinite loop. With this change, 'lim' will always point to the next page boundary. This issue occurred especially during debugging sessions, when a breakpoint was placed on an exact page-aligned offset, for instance. Reviewed by: jhibbits Differential Revision: https://reviews.freebsd.org/D19149 --- sys/powerpc/aim/mmu_oea64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/powerpc/aim/mmu_oea64.c b/sys/powerpc/aim/mmu_oea64.c index e116bac5bf1..85a0814a887 100644 --- a/sys/powerpc/aim/mmu_oea64.c +++ b/sys/powerpc/aim/mmu_oea64.c @@ -2807,7 +2807,7 @@ moea64_sync_icache(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_size_t sz) PMAP_LOCK(pm); while (sz > 0) { - lim = round_page(va); + lim = round_page(va+1); len = MIN(lim - va, sz); pvo = moea64_pvo_find_va(pm, va & ~ADDR_POFF); if (pvo != NULL && !(pvo->pvo_pte.pa & LPTE_I)) { From 240d69b9b46205c008fe824d4e7cf61969d68b58 Mon Sep 17 00:00:00 2001 From: Edward Tomasz Napierala Date: Tue, 12 Feb 2019 13:01:55 +0000 Subject: [PATCH 49/93] Fix markup - use .Pa for the directory component, not .Fa. Reported by: 0mp MFC after: 2 weeks Sponsored by: DARPA, AFRL --- share/man/man7/ports.7 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/share/man/man7/ports.7 b/share/man/man7/ports.7 index d1df91fd337..51ae3595941 100644 --- a/share/man/man7/ports.7 +++ b/share/man/man7/ports.7 @@ -25,7 +25,7 @@ .\" .\" $FreeBSD$ .\" -.Dd February 11, 2019 +.Dd February 12, 2019 .Dt PORTS 7 .Os .Sh NAME @@ -79,7 +79,7 @@ or from Subversion repository at: The .Em quarterly branches can be found in Subversion in the -.Fa branches/ +.Pa branches/ subdirectory, eg: .Pp .Lk https://svn.FreeBSD.org/ports/branches/2019Q1 From 54553daf6d0da93a3b4b029c66b836e79a71ab6d Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Tue, 12 Feb 2019 18:32:14 +0000 Subject: [PATCH 50/93] Pull in r339734 from upstream llvm trunk (by Eli Friedman): [ARM] Make PerformSHLSimplify add nodes to the DAG worklist correctly. Intentionally excluding nodes from the DAGCombine worklist is likely to lead to weird optimizations and infinite loops, so it's generally a bad idea. To avoid the infinite loops, fix DAGCombine to use the isDesirableToCommuteWithShift target hook before performing the transforms in question, and implement the target hook in the ARM backend disable the transforms in question. Fixes https://bugs.llvm.org/show_bug.cgi?id=38530 . (I don't have a reduced testcase for that bug. But we should have sufficient test coverage for PerformSHLSimplify given that we're not playing weird tricks with the worklist. I can try to bugpoint it if necessary, though.) Differential Revision: https://reviews.llvm.org/D50667 This should fix a possible hang when compiling sys/dev/nxge/if_nxge.c (which exists now only in the stable/11 branch) for arm. --- .../include/llvm/CodeGen/TargetLowering.h | 16 ++++++++----- .../lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 5 ++-- .../Target/AArch64/AArch64ISelLowering.cpp | 4 +++- .../lib/Target/AArch64/AArch64ISelLowering.h | 3 ++- .../llvm/lib/Target/ARM/ARMISelLowering.cpp | 23 ++++++++++++++++--- contrib/llvm/lib/Target/ARM/ARMISelLowering.h | 3 +++ 6 files changed, 41 insertions(+), 13 deletions(-) diff --git a/contrib/llvm/include/llvm/CodeGen/TargetLowering.h b/contrib/llvm/include/llvm/CodeGen/TargetLowering.h index 40540bd6e1f..847da671c42 100644 --- a/contrib/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/contrib/llvm/include/llvm/CodeGen/TargetLowering.h @@ -2935,12 +2935,16 @@ public: /// virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; - /// Return true if it is profitable to move a following shift through this - // node, adjusting any immediate operands as necessary to preserve semantics. - // This transformation may not be desirable if it disrupts a particularly - // auspicious target-specific tree (e.g. bitfield extraction in AArch64). - // By default, it returns true. - virtual bool isDesirableToCommuteWithShift(const SDNode *N) const { + /// Return true if it is profitable to move this shift by a constant amount + /// though its operand, adjusting any immediate operands as necessary to + /// preserve semantics. This transformation may not be desirable if it + /// disrupts a particularly auspicious target-specific tree (e.g. bitfield + /// extraction in AArch64). By default, it returns true. + /// + /// @param N the shift node + /// @param Level the current DAGCombine legalization level. + virtual bool isDesirableToCommuteWithShift(const SDNode *N, + CombineLevel Level) const { return true; } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index a5c0b775041..5a9a8fba964 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6191,7 +6191,7 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { return SDValue(); } - if (!TLI.isDesirableToCommuteWithShift(LHS)) + if (!TLI.isDesirableToCommuteWithShift(N, Level)) return SDValue(); // Fold the constants, shifting the binop RHS by the shift amount. @@ -6495,7 +6495,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) && N0.getNode()->hasOneUse() && isConstantOrConstantVector(N1, /* No Opaques */ true) && - isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) { + isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) && + TLI.isDesirableToCommuteWithShift(N, Level)) { SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1); SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1); AddToWorklist(Shl0.getNode()); diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index cfc7aa96d31..75b8036779b 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8496,7 +8496,9 @@ AArch64TargetLowering::getScratchRegisters(CallingConv::ID) const { } bool -AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N) const { +AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N, + CombineLevel Level) const { + N = N->getOperand(0).getNode(); EVT VT = N->getValueType(0); // If N is unsigned bit extraction: ((x >> C) & mask), then do not combine // it with shift to let it be lowered to UBFX. diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h index d783c8a6048..a6d66aeae04 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -363,7 +363,8 @@ public: const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; /// Returns false if N is a bit extraction pattern of (X >> C) & Mask. - bool isDesirableToCommuteWithShift(const SDNode *N) const override; + bool isDesirableToCommuteWithShift(const SDNode *N, + CombineLevel Level) const override; /// Returns true if it is beneficial to convert a load of a constant /// to just the constant itself. diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp index ede276dd91b..a763e29b9f6 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -10407,6 +10407,25 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, return SDValue(); } +bool +ARMTargetLowering::isDesirableToCommuteWithShift(const SDNode *N, + CombineLevel Level) const { + if (Level == BeforeLegalizeTypes) + return true; + + if (Subtarget->isThumb() && Subtarget->isThumb1Only()) + return true; + + if (N->getOpcode() != ISD::SHL) + return true; + + // Turn off commute-with-shift transform after legalization, so it doesn't + // conflict with PerformSHLSimplify. (We could try to detect when + // PerformSHLSimplify would trigger more precisely, but it isn't + // really necessary.) + return false; +} + static SDValue PerformSHLSimplify(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST) { @@ -10506,9 +10525,7 @@ static SDValue PerformSHLSimplify(SDNode *N, LLVM_DEBUG(dbgs() << "Simplify shl use:\n"; SHL.getOperand(0).dump(); SHL.dump(); N->dump()); LLVM_DEBUG(dbgs() << "Into:\n"; X.dump(); BinOp.dump(); Res.dump()); - - DAG.ReplaceAllUsesWith(SDValue(N, 0), Res); - return SDValue(N, 0); + return Res; } diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h index 50b4c2977fb..7e7016d1639 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h @@ -583,6 +583,9 @@ class VectorType; unsigned getABIAlignmentForCallingConv(Type *ArgTy, DataLayout DL) const override; + bool isDesirableToCommuteWithShift(const SDNode *N, + CombineLevel Level) const override; + protected: std::pair findRepresentativeClass(const TargetRegisterInfo *TRI, From 4def346d56854a122557c6ffad5649bac35158a4 Mon Sep 17 00:00:00 2001 From: Warner Losh Date: Tue, 12 Feb 2019 19:05:09 +0000 Subject: [PATCH 51/93] Revert r343077 until the license issues surrounding it can be resolved. Approved by: core@ --- usr.sbin/bhyve/uart_emul.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/usr.sbin/bhyve/uart_emul.c b/usr.sbin/bhyve/uart_emul.c index c9d136930eb..3c45ac7fc95 100644 --- a/usr.sbin/bhyve/uart_emul.c +++ b/usr.sbin/bhyve/uart_emul.c @@ -431,13 +431,6 @@ uart_write(struct uart_softc *sc, int offset, uint8_t value) sc->thre_int_pending = true; break; case REG_IER: - /* Assert an interrupt if re-enabling the THRE intr, since we - * always report THRE as active in the status register. - */ - if ((sc->ier & IER_ETXRDY) == 0 && - (value & IER_ETXRDY) != 0) { - sc->thre_int_pending = true; - } /* * Apply mask so that bits 4-7 are 0 * Also enables bits 0-3 only if they're 1 From 50619ae7d5f67fa290fec0efadada4f847d4a0f4 Mon Sep 17 00:00:00 2001 From: Poul-Henning Kamp Date: Tue, 12 Feb 2019 21:06:07 +0000 Subject: [PATCH 52/93] Point people to SMP(4) for CPU<->domain mapping. --- share/man/man4/numa.4 | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/share/man/man4/numa.4 b/share/man/man4/numa.4 index d8fe47efb5d..069eac68d95 100644 --- a/share/man/man4/numa.4 +++ b/share/man/man4/numa.4 @@ -77,6 +77,9 @@ The .Xr cpuset 1 tool is available for starting processes with a non-default policy, or to change the policy of an existing thread or process. +See +.Xr SMP 4 +for information about CPU to domain mapping. .Pp Systems with non-uniform access to I/O devices may mark those devices with the local VM domain identifier. @@ -117,6 +120,7 @@ Policy information is available in both struct thread and struct proc. .Xr cpuset 1 , .Xr cpuset_getaffinity 2 , .Xr cpuset_setaffinity 2 , +.Xr SMP 4 , .Xr bus_get_domain 9 .Sh HISTORY .Nm From 95dcf343b79aae21ae9a8cb0a452b4f1fc6e7328 Mon Sep 17 00:00:00 2001 From: Marius Strobl Date: Tue, 12 Feb 2019 21:08:44 +0000 Subject: [PATCH 53/93] Further correct and optimize the bus_dma(9) usage of iflib(4): o Correct the obvious bugs in the netmap(4) parts: - No longer check for the existence of DMA maps as bus_dma(9) is used unconditionally in iflib(4) since r341095. - Supply the correct DMA tag and map pairs to bus_dma(9) functions (see also the commit message of r343753). - In iflib_netmap_timer_adjust(), add synchronization of the TX descriptors before calling the ift_txd_credits_update method as the latter evaluates the TX descriptors possibly updated by the MAC. - In _task_fn_tx(), wrap the netmap(4)-specific bits in #ifdef DEV_NETMAP just as done in _task_fn_admin() and _task_fn_rx() respectively. o In iflib_fast_intr_rxtx(), synchronize the TX rather than the RX descriptors before calling the ift_txd_credits_update method (see also above). o There's no need to synchronize an RX buffer that is going to be recycled in iflib_rxd_pkt_get(), yet; it's sufficient to do that as late as passing RX buffers to the MAC via the ift_rxd_refill method. Hence, combine that synchronization with the synchronization of new buffers into a common spot in _iflib_fl_refill(). o There's no need to synchronize the RX descriptors of a free list in preparation of the MAC updating their statuses with every invocation of rxd_frag_to_sd(); it's enough to do this once before handing control over to the MAC, i. e. before calling ift_rxd_flush method in _iflib_fl_refill(), which already performs the necessary synchronization. o Given that the ift_rxd_available method evaluates the RX descriptors which possibly have been altered by the MAC, synchronize as appropriate beforehand. Most notably this is now done in iflib_rxd_avail(), which in turn means that we don't need to issue the same synchronization yet again before calling the ift_rxd_pkt_get method in iflib_rxeof(). o In iflib_txd_db_check(), synchronize the TX descriptors before handing them over to the MAC for transmission via the ift_txd_flush method. o In iflib_encap(), move the TX buffer synchronization after the invocation of the ift_txd_encap() method. If the MAC driver fails to encapsulate the packet and we retry with a defragmented mbuf chain or finally fail, the cycles for TX buffer synchronization have been wasted. Synchronizing afterwards matches what non-iflib(4) drivers typically do and is sufficient as the MAC will not actually start with the transmission before - in this case - the ift_txd_flush method is called. Moreover, for the latter reason the synchronization of the TX descriptors in iflib_encap() can go as it's enough to synchronize them before passing control over to the MAC by issuing the ift_txd_flush() method (see above). o In iflib_txq_can_drain(), only synchronize TX descriptors if the ift_txd_credits_update method accessing these is actually called. Differential Revision: https://reviews.freebsd.org/D19081 --- sys/net/iflib.c | 140 ++++++++++++++++++++++++------------------------ 1 file changed, 71 insertions(+), 69 deletions(-) diff --git a/sys/net/iflib.c b/sys/net/iflib.c index 8fa6c9c440d..c7e853bd390 100644 --- a/sys/net/iflib.c +++ b/sys/net/iflib.c @@ -845,11 +845,13 @@ netmap_fl_refill(iflib_rxq_t rxq, struct netmap_kring *kring, uint32_t nm_i, boo return netmap_ring_reinit(kring); fl->ifl_vm_addrs[tmp_pidx] = addr; - if (__predict_false(init) && map) { - netmap_load_map(na, fl->ifl_ifdi->idi_tag, map[nic_i], addr); - } else if (map && (slot->flags & NS_BUF_CHANGED)) { + if (__predict_false(init)) { + netmap_load_map(na, fl->ifl_buf_tag, + map[nic_i], addr); + } else if (slot->flags & NS_BUF_CHANGED) { /* buffer has changed, reload map */ - netmap_reload_map(na, fl->ifl_ifdi->idi_tag, map[nic_i], addr); + netmap_reload_map(na, fl->ifl_buf_tag, + map[nic_i], addr); } slot->flags &= ~NS_BUF_CHANGED; @@ -861,13 +863,9 @@ netmap_fl_refill(iflib_rxq_t rxq, struct netmap_kring *kring, uint32_t nm_i, boo iru.iru_pidx = refill_pidx; iru.iru_count = tmp_pidx+1; ctx->isc_rxd_refill(ctx->ifc_softc, &iru); - refill_pidx = nic_i; - if (map == NULL) - continue; - for (int n = 0; n < iru.iru_count; n++) { - bus_dmamap_sync(fl->ifl_ifdi->idi_tag, map[nic_i_dma], + bus_dmamap_sync(fl->ifl_buf_tag, map[nic_i_dma], BUS_DMASYNC_PREREAD); /* XXX - change this to not use the netmap func*/ nic_i_dma = nm_next(nic_i_dma, lim); @@ -876,9 +874,8 @@ netmap_fl_refill(iflib_rxq_t rxq, struct netmap_kring *kring, uint32_t nm_i, boo } kring->nr_hwcur = head; - if (map) - bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); if (__predict_true(nic_i != UINT_MAX)) { ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, fl->ifl_id, nic_i); DBG_COUNTER_INC(rxd_flush); @@ -922,7 +919,7 @@ iflib_netmap_txsync(struct netmap_kring *kring, int flags) if_ctx_t ctx = ifp->if_softc; iflib_txq_t txq = &ctx->ifc_txqs[kring->ring_id]; - bus_dmamap_sync(txq->ift_buf_tag, txq->ift_ifdi->idi_map, + bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); /* @@ -956,8 +953,7 @@ iflib_netmap_txsync(struct netmap_kring *kring, int flags) __builtin_prefetch(&ring->slot[nm_i]); __builtin_prefetch(&txq->ift_sds.ifsd_m[nic_i]); - if (txq->ift_sds.ifsd_map) - __builtin_prefetch(&txq->ift_sds.ifsd_map[nic_i]); + __builtin_prefetch(&txq->ift_sds.ifsd_map[nic_i]); for (n = 0; nm_i != head; n++) { struct netmap_slot *slot = &ring->slot[nm_i]; @@ -984,20 +980,20 @@ iflib_netmap_txsync(struct netmap_kring *kring, int flags) /* prefetch for next round */ __builtin_prefetch(&ring->slot[nm_i + 1]); __builtin_prefetch(&txq->ift_sds.ifsd_m[nic_i + 1]); - if (txq->ift_sds.ifsd_map) { - __builtin_prefetch(&txq->ift_sds.ifsd_map[nic_i + 1]); + __builtin_prefetch(&txq->ift_sds.ifsd_map[nic_i + 1]); - NM_CHECK_ADDR_LEN(na, addr, len); + NM_CHECK_ADDR_LEN(na, addr, len); - if (slot->flags & NS_BUF_CHANGED) { - /* buffer has changed, reload map */ - netmap_reload_map(na, txq->ift_buf_tag, - txq->ift_sds.ifsd_map[nic_i], addr); - } - /* make sure changes to the buffer are synced */ - bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_sds.ifsd_map[nic_i], - BUS_DMASYNC_PREWRITE); + if (slot->flags & NS_BUF_CHANGED) { + /* buffer has changed, reload map */ + netmap_reload_map(na, txq->ift_buf_tag, + txq->ift_sds.ifsd_map[nic_i], addr); } + /* make sure changes to the buffer are synced */ + bus_dmamap_sync(txq->ift_buf_tag, + txq->ift_sds.ifsd_map[nic_i], + BUS_DMASYNC_PREWRITE); + slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); nm_i = nm_next(nm_i, lim); nic_i = nm_next(nic_i, lim); @@ -1005,7 +1001,7 @@ iflib_netmap_txsync(struct netmap_kring *kring, int flags) kring->nr_hwcur = nm_i; /* synchronize the NIC ring */ - bus_dmamap_sync(txq->ift_buf_tag, txq->ift_ifdi->idi_map, + bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* (re)start the tx unit up to slot nic_i (excluded) */ @@ -1053,6 +1049,7 @@ iflib_netmap_rxsync(struct netmap_kring *kring, int flags) { struct netmap_adapter *na = kring->na; struct netmap_ring *ring = kring->ring; + iflib_fl_t fl; uint32_t nm_i; /* index into the netmap ring */ uint32_t nic_i; /* index into the NIC ring */ u_int i, n; @@ -1064,18 +1061,18 @@ iflib_netmap_rxsync(struct netmap_kring *kring, int flags) struct ifnet *ifp = na->ifp; if_ctx_t ctx = ifp->if_softc; iflib_rxq_t rxq = &ctx->ifc_rxqs[kring->ring_id]; - iflib_fl_t fl = rxq->ifr_fl; if (head > lim) return netmap_ring_reinit(kring); - /* XXX check sync modes */ + /* + * XXX netmap_fl_refill() only ever (re)fills free list 0 so far. + */ + for (i = 0, fl = rxq->ifr_fl; i < rxq->ifr_nfl; i++, fl++) { - if (fl->ifl_sds.ifsd_map == NULL) - continue; - bus_dmamap_sync(rxq->ifr_fl[i].ifl_buf_tag, - fl->ifl_ifdi->idi_map, + bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); } + /* * First part: import newly received packets. * @@ -1099,7 +1096,8 @@ iflib_netmap_rxsync(struct netmap_kring *kring, int flags) fl = &rxq->ifr_fl[i]; nic_i = fl->ifl_cidx; nm_i = netmap_idx_n2k(kring, nic_i); - avail = iflib_rxd_avail(ctx, rxq, nic_i, USHRT_MAX); + avail = ctx->isc_rxd_available(ctx->ifc_softc, + rxq->ifr_id, nic_i, USHRT_MAX); for (n = 0; avail > 0; n++, avail--) { rxd_info_zero(&ri); ri.iri_frags = rxq->ifr_frags; @@ -1110,7 +1108,7 @@ iflib_netmap_rxsync(struct netmap_kring *kring, int flags) error = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri); ring->slot[nm_i].len = error ? 0 : ri.iri_len - crclen; ring->slot[nm_i].flags = 0; - bus_dmamap_sync(fl->ifl_ifdi->idi_tag, + bus_dmamap_sync(fl->ifl_buf_tag, fl->ifl_sds.ifsd_map[nic_i], BUS_DMASYNC_POSTREAD); nm_i = nm_next(nm_i, lim); nic_i = nm_next(nic_i, lim); @@ -1221,13 +1219,17 @@ iflib_netmap_rxq_init(if_ctx_t ctx, iflib_rxq_t rxq) } static void -iflib_netmap_timer_adjust(if_ctx_t ctx, uint16_t txqid, uint32_t *reset_on) +iflib_netmap_timer_adjust(if_ctx_t ctx, iflib_txq_t txq, uint32_t *reset_on) { struct netmap_kring *kring; + uint16_t txqid; + txqid = txq->ift_id; kring = NA(ctx->ifc_ifp)->tx_rings[txqid]; if (kring->nr_hwcur != nm_next(kring->nr_hwtail, kring->nkr_num_slots - 1)) { + bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, + BUS_DMASYNC_POSTREAD); if (ctx->isc_txd_credits_update(ctx->ifc_softc, txqid, false)) netmap_tx_irq(ctx->ifc_ifp, txqid); if (!(ctx->ifc_flags & IFC_NETMAP_TX_IRQ)) { @@ -1249,7 +1251,7 @@ iflib_netmap_timer_adjust(if_ctx_t ctx, uint16_t txqid, uint32_t *reset_on) #define iflib_netmap_attach(ctx) (0) #define netmap_rx_irq(ifp, qid, budget) (0) #define netmap_tx_irq(ifp, qid) do {} while (0) -#define iflib_netmap_timer_adjust(ctx, txqid, reset_on) +#define iflib_netmap_timer_adjust(ctx, txq, reset_on) #endif @@ -1482,9 +1484,12 @@ iflib_fast_intr_rxtx(void *arg) { iflib_filter_info_t info = arg; struct grouptask *gtask = info->ifi_task; + if_ctx_t ctx; iflib_rxq_t rxq = (iflib_rxq_t)info->ifi_ctx; - if_ctx_t ctx = NULL;; + iflib_txq_t txq; + void *sc; int i, cidx; + qidx_t txqid; if (!iflib_started) return (FILTER_HANDLED); @@ -1493,19 +1498,19 @@ iflib_fast_intr_rxtx(void *arg) if (info->ifi_filter != NULL && info->ifi_filter(info->ifi_filter_arg) == FILTER_HANDLED) return (FILTER_HANDLED); + ctx = rxq->ifr_ctx; + sc = ctx->ifc_softc; MPASS(rxq->ifr_ntxqirq); for (i = 0; i < rxq->ifr_ntxqirq; i++) { - qidx_t txqid = rxq->ifr_txqid[i]; - - ctx = rxq->ifr_ctx; - - bus_dmamap_sync(rxq->ifr_ifdi->idi_tag, rxq->ifr_ifdi->idi_map, + txqid = rxq->ifr_txqid[i]; + txq = &ctx->ifc_txqs[txqid]; + bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, BUS_DMASYNC_POSTREAD); - if (!ctx->isc_txd_credits_update(ctx->ifc_softc, txqid, false)) { + if (!ctx->isc_txd_credits_update(sc, txqid, false)) { IFDI_TX_QUEUE_INTR_ENABLE(ctx, txqid); continue; } - GROUPTASK_ENQUEUE(&ctx->ifc_txqs[txqid].ift_task); + GROUPTASK_ENQUEUE(&txq->ift_task); } if (ctx->ifc_sctx->isc_flags & IFLIB_HAS_RXCQ) cidx = rxq->ifr_cq_cidx; @@ -1804,7 +1809,7 @@ iflib_txq_setup(iflib_txq_t txq) IFDI_TXQ_SETUP(ctx, txq->ift_id); for (i = 0, di = txq->ift_ifdi; i < sctx->isc_ntxqs; i++, di++) bus_dmamap_sync(di->idi_tag, di->idi_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); return (0); } @@ -2008,8 +2013,6 @@ _iflib_fl_refill(if_ctx_t ctx, iflib_fl_t fl, int count) break; } - bus_dmamap_sync(fl->ifl_buf_tag, sd_map[frag_idx], - BUS_DMASYNC_PREREAD); sd_ba[frag_idx] = bus_addr = cb_arg.seg.ds_addr; sd_cl[frag_idx] = cl; #if MEMORY_LOGGING @@ -2018,6 +2021,8 @@ _iflib_fl_refill(if_ctx_t ctx, iflib_fl_t fl, int count) } else { bus_addr = sd_ba[frag_idx]; } + bus_dmamap_sync(fl->ifl_buf_tag, sd_map[frag_idx], + BUS_DMASYNC_PREREAD); MPASS(sd_m[frag_idx] == NULL); if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) { @@ -2285,7 +2290,7 @@ iflib_timer(void *arg) } #ifdef DEV_NETMAP if (if_getcapenable(ctx->ifc_ifp) & IFCAP_NETMAP) - iflib_netmap_timer_adjust(ctx, txq->ift_id, &reset_on); + iflib_netmap_timer_adjust(ctx, txq, &reset_on); #endif /* handle any laggards */ if (txq->ift_db_pending) @@ -2494,7 +2499,6 @@ rxd_frag_to_sd(iflib_rxq_t rxq, if_rxd_frag_t irf, int unload, if_rxsd_t sd) int flid, cidx; bus_dmamap_t map; iflib_fl_t fl; - iflib_dma_info_t di; int next; map = NULL; @@ -2514,7 +2518,6 @@ rxd_frag_to_sd(iflib_rxq_t rxq, if_rxd_frag_t irf, int unload, if_rxsd_t sd) next = (cidx + CACHE_PTR_INCREMENT) & (fl->ifl_size-1); prefetch(&fl->ifl_sds.ifsd_map[next]); map = fl->ifl_sds.ifsd_map[cidx]; - di = fl->ifl_ifdi; next = (cidx + CACHE_LINE_SIZE) & (fl->ifl_size-1); /* not valid assert if bxe really does SGE from non-contiguous elements */ @@ -2525,8 +2528,6 @@ rxd_frag_to_sd(iflib_rxq_t rxq, if_rxd_frag_t irf, int unload, if_rxsd_t sd) fl->ifl_cidx = (fl->ifl_cidx + 1) & (fl->ifl_size-1); if (__predict_false(fl->ifl_cidx == 0)) fl->ifl_gen = 0; - bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); bit_clear(fl->ifl_rx_bitmap, cidx); } @@ -2604,9 +2605,6 @@ iflib_rxd_pkt_get(iflib_rxq_t rxq, if_rxd_info_t ri) m->m_data += 2; #endif memcpy(m->m_data, *sd.ifsd_cl, ri->iri_len); - bus_dmamap_sync(rxq->ifr_fl->ifl_buf_tag, - rxq->ifr_fl->ifl_sds.ifsd_map[ri->iri_frags[0].irf_idx], - BUS_DMASYNC_PREREAD); m->m_len = ri->iri_frags[0].irf_len; } else { m = assemble_segments(rxq, ri, &sd); @@ -2675,7 +2673,6 @@ iflib_rxeof(iflib_rxq_t rxq, qidx_t budget) if_ctx_t ctx = rxq->ifr_ctx; if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; - iflib_dma_info_t di; int avail, i; qidx_t *cidxp; struct if_rxd_info ri; @@ -2720,9 +2717,6 @@ iflib_rxeof(iflib_rxq_t rxq, qidx_t budget) ri.iri_cidx = *cidxp; ri.iri_ifp = ifp; ri.iri_frags = rxq->ifr_frags; - di = rxq->ifr_fl[rxq->ifr_frags[0].irf_flid].ifl_ifdi; - bus_dmamap_sync(di->idi_tag, di->idi_map, - BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); err = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri); if (err) @@ -2891,6 +2885,8 @@ iflib_txd_db_check(if_ctx_t ctx, iflib_txq_t txq, int ring, qidx_t in_use) max = TXQ_MAX_DB_DEFERRED(txq, in_use); if (ring || txq->ift_db_pending >= max) { dbval = txq->ift_npending ? txq->ift_npending : txq->ift_pidx; + bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); ctx->isc_txd_flush(ctx->ifc_softc, txq->ift_id, dbval); txq->ift_db_pending = txq->ift_npending = 0; rang = true; @@ -3333,10 +3329,8 @@ defrag: #ifdef PKT_DEBUG print_pkt(&pi); #endif - bus_dmamap_sync(buf_tag, map, BUS_DMASYNC_PREWRITE); if ((err = ctx->isc_txd_encap(ctx->ifc_softc, &pi)) == 0) { - bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + bus_dmamap_sync(buf_tag, map, BUS_DMASYNC_PREWRITE); DBG_COUNTER_INC(tx_encap); MPASS(pi.ipi_new_pidx < txq->ift_size); @@ -3505,10 +3499,12 @@ iflib_txq_can_drain(struct ifmp_ring *r) iflib_txq_t txq = r->cookie; if_ctx_t ctx = txq->ift_ctx; + if (TXQ_AVAIL(txq) > MAX_TX_DESC(ctx) + 2) + return (1); bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, BUS_DMASYNC_POSTREAD); - return ((TXQ_AVAIL(txq) > MAX_TX_DESC(ctx) + 2) || - ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, false)); + return (ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, + false)); } static uint32_t @@ -3662,7 +3658,6 @@ _task_fn_tx(void *context) { iflib_txq_t txq = context; if_ctx_t ctx = txq->ift_ctx; - struct ifnet *ifp = ctx->ifc_ifp; int abdicate = ctx->ifc_sysctl_tx_abdicate; #ifdef IFLIB_DIAGNOSTICS @@ -3670,14 +3665,16 @@ _task_fn_tx(void *context) #endif if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) return; - if (if_getcapenable(ifp) & IFCAP_NETMAP) { +#ifdef DEV_NETMAP + if (if_getcapenable(ctx->ifc_ifp) & IFCAP_NETMAP) { bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, BUS_DMASYNC_POSTREAD); if (ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, false)) - netmap_tx_irq(ifp, txq->ift_id); + netmap_tx_irq(ctx->ifc_ifp, txq->ift_id); IFDI_TX_QUEUE_INTR_ENABLE(ctx, txq->ift_id); return; } +#endif #ifdef ALTQ if (ALTQ_IS_ENABLED(&ifp->if_snd)) iflib_altq_if_start(ifp); @@ -3785,7 +3782,7 @@ _task_fn_admin(void *context) #ifdef DEV_NETMAP reset_on = hz / 2; if (if_getcapenable(ctx->ifc_ifp) & IFCAP_NETMAP) - iflib_netmap_timer_adjust(ctx, txq->ift_id, &reset_on); + iflib_netmap_timer_adjust(ctx, txq, &reset_on); #endif callout_reset_on(&txq->ift_timer, reset_on, iflib_timer, txq, txq->ift_timer.c_cpu); } @@ -5953,7 +5950,12 @@ iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq) static int iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, qidx_t cidx, qidx_t budget) { + iflib_fl_t fl; + u_int i; + for (i = 0, fl = &rxq->ifr_fl[0]; i < rxq->ifr_nfl; i++, fl++) + bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, + BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); return (ctx->isc_rxd_available(ctx->ifc_softc, rxq->ifr_id, cidx, budget)); } From 3838c6a3e6964d6c69f132c0ae12991857fa5200 Mon Sep 17 00:00:00 2001 From: Kristof Provost Date: Tue, 12 Feb 2019 21:22:57 +0000 Subject: [PATCH 54/93] garp: Fix vnet related panic for gratuitous arp Gratuitous ARP packets are sent from a timer, which means we don't have a vnet context set. As a result we panic trying to send the packet. Set the vnet context based on the interface associated with the interface address. To reproduce: sysctl net.link.ether.inet.garp_rexmit_count=2 ifconfig vtnet1 10.0.0.1/24 up PR: 235699 Reviewed by: vangyzen@ MFC after: 1 week --- sys/netinet/if_ether.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sys/netinet/if_ether.c b/sys/netinet/if_ether.c index f278fa1b3ca..c84fcee3328 100644 --- a/sys/netinet/if_ether.c +++ b/sys/netinet/if_ether.c @@ -1335,6 +1335,8 @@ garp_rexmit(void *arg) return; } + CURVNET_SET(ia->ia_ifa.ifa_ifp->if_vnet); + /* * Drop lock while the ARP request is generated. */ @@ -1362,6 +1364,8 @@ garp_rexmit(void *arg) ifa_free(&ia->ia_ifa); } } + + CURVNET_RESTORE(); } /* From f855ec814db219bedf0eda304989a971ed242386 Mon Sep 17 00:00:00 2001 From: Marius Strobl Date: Tue, 12 Feb 2019 21:23:59 +0000 Subject: [PATCH 55/93] Make taskqgroup_attach{,_cpu}(9) work across architectures So far, intr_{g,s}etaffinity(9) take a single int for identifying a device interrupt. This approach doesn't work on all architectures supported, as a single int isn't sufficient to globally specify a device interrupt. In particular, with multiple interrupt controllers in one system as found on e. g. arm and arm64 machines, an interrupt number as returned by rman_get_start(9) may be only unique relative to the bus and, thus, interrupt controller, a certain device hangs off from. In turn, this makes taskqgroup_attach{,_cpu}(9) and - internal to the gtaskqueue implementation - taskqgroup_attach_deferred{,_cpu}() not work across architectures. Yet in turn, iflib(4) as gtaskqueue consumer so far doesn't fit architectures where interrupt numbers aren't globally unique. However, at least for intr_setaffinity(..., CPU_WHICH_IRQ, ...) as employed by the gtaskqueue implementation to bind an interrupt to a particular CPU, using bus_bind_intr(9) instead is equivalent from a functional point of view, with bus_bind_intr(9) taking the device and interrupt resource arguments required for uniquely specifying a device interrupt. Thus, change the gtaskqueue implementation to employ bus_bind_intr(9) instead and intr_{g,s}etaffinity(9) to take the device and interrupt resource arguments required respectively. This change also moves struct grouptask from to and wraps struct gtask along with the gtask_fn_t typedef into #ifdef _KERNEL as userland likes to include or indirectly drags it in - for better or worse also with _KERNEL defined -, which with device_t and struct resource dependencies otherwise is no longer as easily possible now. The userland inclusion problem probably can be improved a bit by introducing a _WANT_TASK (as well as a _WANT_MOUNT) akin to the existing _WANT_PRISON etc., which is orthogonal to this change, though, and likely needs an exp-run. While at it: - Change the gt_cpu member in the grouptask structure to be of type int as used elswhere for specifying CPUs (an int16_t may be too narrow sooner or later), - move the gtaskqueue_enqueue_fn typedef from to the gtaskqueue implementation as it's only used and needed there, - change the GTASK_INIT macro to use "gtask" rather than "task" as argument given that it actually operates on a struct gtask rather than a struct task, and - let subr_gtaskqueue.c consistently use __func__ to print functions names. Reported by: mmel Reviewed by: mmel Differential Revision: https://reviews.freebsd.org/D19139 --- .../linuxkpi/common/src/linux_tasklet.c | 2 +- sys/kern/subr_epoch.c | 2 +- sys/kern/subr_gtaskqueue.c | 77 +++++++++---------- sys/net/iflib.c | 63 ++++++++------- sys/sys/_task.h | 20 ++--- sys/sys/gtaskqueue.h | 42 ++++++---- sys/sys/param.h | 2 +- 7 files changed, 112 insertions(+), 96 deletions(-) diff --git a/sys/compat/linuxkpi/common/src/linux_tasklet.c b/sys/compat/linuxkpi/common/src/linux_tasklet.c index 049d9caac69..4e4833c347e 100644 --- a/sys/compat/linuxkpi/common/src/linux_tasklet.c +++ b/sys/compat/linuxkpi/common/src/linux_tasklet.c @@ -109,7 +109,7 @@ tasklet_subsystem_init(void *arg __unused) GROUPTASK_INIT(&tw->gtask, 0, tasklet_handler, tw); snprintf(buf, sizeof(buf), "softirq%d", i); taskqgroup_attach_cpu(qgroup_softirq, &tw->gtask, - "tasklet", i, -1, buf); + "tasklet", i, NULL, NULL, buf); } } SYSINIT(linux_tasklet, SI_SUB_TASKQ, SI_ORDER_THIRD, tasklet_subsystem_init, NULL); diff --git a/sys/kern/subr_epoch.c b/sys/kern/subr_epoch.c index 9104f1e0880..8d76ab71cfc 100644 --- a/sys/kern/subr_epoch.c +++ b/sys/kern/subr_epoch.c @@ -147,7 +147,7 @@ epoch_init(void *arg __unused) GROUPTASK_INIT(DPCPU_ID_PTR(cpu, epoch_cb_task), 0, epoch_call_task, NULL); taskqgroup_attach_cpu(qgroup_softirq, - DPCPU_ID_PTR(cpu, epoch_cb_task), NULL, cpu, -1, + DPCPU_ID_PTR(cpu, epoch_cb_task), NULL, cpu, NULL, NULL, "epoch call task"); } inited = 1; diff --git a/sys/kern/subr_gtaskqueue.c b/sys/kern/subr_gtaskqueue.c index fb68c108214..8be973cddb2 100644 --- a/sys/kern/subr_gtaskqueue.c +++ b/sys/kern/subr_gtaskqueue.c @@ -33,7 +33,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include #include #include @@ -64,6 +63,8 @@ struct gtaskqueue_busy { static struct gtask * const TB_DRAIN_WAITER = (struct gtask *)0x1; +typedef void (*gtaskqueue_enqueue_fn)(void *context); + struct gtaskqueue { STAILQ_HEAD(, gtask) tq_queue; gtaskqueue_enqueue_fn tq_enqueue; @@ -681,7 +682,7 @@ taskqgroup_find(struct taskqgroup *qgroup, void *uniq) } } if (idx == -1) - panic("taskqgroup_find: Failed to pick a qid."); + panic("%s: failed to pick a qid.", __func__); return (idx); } @@ -713,13 +714,13 @@ SYSINIT(tqg_record_smp_started, SI_SUB_SMP, SI_ORDER_FOURTH, void taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask, - void *uniq, int irq, const char *name) + void *uniq, device_t dev, struct resource *irq, const char *name) { - cpuset_t mask; - int qid, error; + int cpu, qid, error; gtask->gt_uniq = uniq; snprintf(gtask->gt_name, GROUPTASK_NAMELEN, "%s", name ? name : "grouptask"); + gtask->gt_dev = dev; gtask->gt_irq = irq; gtask->gt_cpu = -1; mtx_lock(&qgroup->tqg_lock); @@ -727,14 +728,14 @@ taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask, qgroup->tqg_queue[qid].tgc_cnt++; LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list); gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq; - if (irq != -1 && tqg_smp_started) { - gtask->gt_cpu = qgroup->tqg_queue[qid].tgc_cpu; - CPU_ZERO(&mask); - CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask); + if (dev != NULL && irq != NULL && tqg_smp_started) { + cpu = qgroup->tqg_queue[qid].tgc_cpu; + gtask->gt_cpu = cpu; mtx_unlock(&qgroup->tqg_lock); - error = intr_setaffinity(irq, CPU_WHICH_IRQ, &mask); + error = bus_bind_intr(dev, irq, cpu); if (error) - printf("%s: setaffinity failed for %s: %d\n", __func__, gtask->gt_name, error); + printf("%s: binding interrupt failed for %s: %d\n", + __func__, gtask->gt_name, error); } else mtx_unlock(&qgroup->tqg_lock); } @@ -742,27 +743,22 @@ taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask, static void taskqgroup_attach_deferred(struct taskqgroup *qgroup, struct grouptask *gtask) { - cpuset_t mask; int qid, cpu, error; mtx_lock(&qgroup->tqg_lock); qid = taskqgroup_find(qgroup, gtask->gt_uniq); cpu = qgroup->tqg_queue[qid].tgc_cpu; - if (gtask->gt_irq != -1) { + if (gtask->gt_dev != NULL && gtask->gt_irq != NULL) { mtx_unlock(&qgroup->tqg_lock); - - CPU_ZERO(&mask); - CPU_SET(cpu, &mask); - error = intr_setaffinity(gtask->gt_irq, CPU_WHICH_IRQ, &mask); + error = bus_bind_intr(gtask->gt_dev, gtask->gt_irq, cpu); mtx_lock(&qgroup->tqg_lock); if (error) - printf("%s: %s setaffinity failed: %d\n", __func__, gtask->gt_name, error); + printf("%s: binding interrupt failed for %s: %d\n", + __func__, gtask->gt_name, error); } qgroup->tqg_queue[qid].tgc_cnt++; - - LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, - gt_list); + LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list); MPASS(qgroup->tqg_queue[qid].tgc_taskq != NULL); gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq; mtx_unlock(&qgroup->tqg_lock); @@ -770,14 +766,14 @@ taskqgroup_attach_deferred(struct taskqgroup *qgroup, struct grouptask *gtask) int taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask, - void *uniq, int cpu, int irq, const char *name) + void *uniq, int cpu, device_t dev, struct resource *irq, const char *name) { - cpuset_t mask; int i, qid, error; qid = -1; gtask->gt_uniq = uniq; snprintf(gtask->gt_name, GROUPTASK_NAMELEN, "%s", name ? name : "grouptask"); + gtask->gt_dev = dev; gtask->gt_irq = irq; gtask->gt_cpu = cpu; mtx_lock(&qgroup->tqg_lock); @@ -800,12 +796,11 @@ taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask, cpu = qgroup->tqg_queue[qid].tgc_cpu; mtx_unlock(&qgroup->tqg_lock); - CPU_ZERO(&mask); - CPU_SET(cpu, &mask); - if (irq != -1 && tqg_smp_started) { - error = intr_setaffinity(irq, CPU_WHICH_IRQ, &mask); + if (dev != NULL && irq != NULL && tqg_smp_started) { + error = bus_bind_intr(dev, irq, cpu); if (error) - printf("%s: setaffinity failed: %d\n", __func__, error); + printf("%s: binding interrupt failed for %s: %d\n", + __func__, gtask->gt_name, error); } return (0); } @@ -813,10 +808,12 @@ taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask, static int taskqgroup_attach_cpu_deferred(struct taskqgroup *qgroup, struct grouptask *gtask) { - cpuset_t mask; - int i, qid, irq, cpu, error; + device_t dev; + struct resource *irq; + int cpu, error, i, qid; qid = -1; + dev = gtask->gt_dev; irq = gtask->gt_irq; cpu = gtask->gt_cpu; MPASS(tqg_smp_started); @@ -837,13 +834,11 @@ taskqgroup_attach_cpu_deferred(struct taskqgroup *qgroup, struct grouptask *gtas gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq; mtx_unlock(&qgroup->tqg_lock); - CPU_ZERO(&mask); - CPU_SET(cpu, &mask); - - if (irq != -1) { - error = intr_setaffinity(irq, CPU_WHICH_IRQ, &mask); + if (dev != NULL && irq != NULL) { + error = bus_bind_intr(dev, irq, cpu); if (error) - printf("%s: setaffinity failed: %d\n", __func__, error); + printf("%s: binding interrupt failed for %s: %d\n", + __func__, gtask->gt_name, error); } return (0); } @@ -859,7 +854,7 @@ taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask) if (qgroup->tqg_queue[i].tgc_taskq == gtask->gt_taskqueue) break; if (i == qgroup->tqg_cnt) - panic("taskqgroup_detach: task %s not in group\n", gtask->gt_name); + panic("%s: task %s not in group", __func__, gtask->gt_name); qgroup->tqg_queue[i].tgc_cnt--; LIST_REMOVE(gtask, gt_list); mtx_unlock(&qgroup->tqg_lock); @@ -882,8 +877,7 @@ taskqgroup_binder(void *ctx) thread_unlock(curthread); if (error) - printf("%s: setaffinity failed: %d\n", __func__, - error); + printf("%s: binding curthread failed: %d\n", __func__, error); free(gtask, M_DEVBUF); } @@ -1051,15 +1045,16 @@ taskqgroup_destroy(struct taskqgroup *qgroup) void taskqgroup_config_gtask_init(void *ctx, struct grouptask *gtask, gtask_fn_t *fn, - const char *name) + const char *name) { GROUPTASK_INIT(gtask, 0, fn, ctx); - taskqgroup_attach(qgroup_config, gtask, gtask, -1, name); + taskqgroup_attach(qgroup_config, gtask, gtask, NULL, NULL, name); } void taskqgroup_config_gtask_deinit(struct grouptask *gtask) { + taskqgroup_detach(qgroup_config, gtask); } diff --git a/sys/net/iflib.c b/sys/net/iflib.c index c7e853bd390..1f922b38281 100644 --- a/sys/net/iflib.c +++ b/sys/net/iflib.c @@ -4481,7 +4481,8 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct GROUPTASK_INIT(&ctx->ifc_admin_task, 0, _task_fn_admin, ctx); /* XXX format name */ - taskqgroup_attach(qgroup_if_config_tqg, &ctx->ifc_admin_task, ctx, -1, "admin"); + taskqgroup_attach(qgroup_if_config_tqg, &ctx->ifc_admin_task, ctx, + NULL, NULL, "admin"); /* Set up cpu set. If it fails, use the set of all CPUs. */ if (bus_get_cpus(dev, INTR_CPUS, sizeof(ctx->ifc_cpus), &ctx->ifc_cpus) != 0) { @@ -4742,7 +4743,8 @@ iflib_pseudo_register(device_t dev, if_shared_ctx_t sctx, if_ctx_t *ctxp, GROUPTASK_INIT(&ctx->ifc_admin_task, 0, _task_fn_admin, ctx); /* XXX format name */ - taskqgroup_attach(qgroup_if_config_tqg, &ctx->ifc_admin_task, ctx, -1, "admin"); + taskqgroup_attach(qgroup_if_config_tqg, &ctx->ifc_admin_task, ctx, + NULL, NULL, "admin"); /* XXX --- can support > 1 -- but keep it simple for now */ scctx->isc_intr = IFLIB_INTR_LEGACY; @@ -5634,19 +5636,22 @@ get_core_offset(if_ctx_t ctx, iflib_intr_type_t type, int qid) /* Just to avoid copy/paste */ static inline int -iflib_irq_set_affinity(if_ctx_t ctx, int irq, iflib_intr_type_t type, int qid, - struct grouptask *gtask, struct taskqgroup *tqg, void *uniq, const char *name) +iflib_irq_set_affinity(if_ctx_t ctx, if_irq_t irq, iflib_intr_type_t type, + int qid, struct grouptask *gtask, struct taskqgroup *tqg, void *uniq, + const char *name) { - int cpuid; - int err, tid; + device_t dev; + int err, cpuid, tid; + dev = ctx->ifc_dev; cpuid = find_nth(ctx, qid); tid = get_core_offset(ctx, type, qid); MPASS(tid >= 0); cpuid = find_close_core(cpuid, tid); - err = taskqgroup_attach_cpu(tqg, gtask, uniq, cpuid, irq, name); + err = taskqgroup_attach_cpu(tqg, gtask, uniq, cpuid, dev, irq->ii_res, + name); if (err) { - device_printf(ctx->ifc_dev, "taskqgroup_attach_cpu failed %d\n", err); + device_printf(dev, "taskqgroup_attach_cpu failed %d\n", err); return (err); } #ifdef notyet @@ -5661,6 +5666,7 @@ iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid, iflib_intr_type_t type, driver_filter_t *filter, void *filter_arg, int qid, const char *name) { + device_t dev; struct grouptask *gtask; struct taskqgroup *tqg; iflib_filter_info_t info; @@ -5720,20 +5726,22 @@ iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid, info->ifi_task = gtask; info->ifi_ctx = q; + dev = ctx->ifc_dev; err = _iflib_irq_alloc(ctx, irq, rid, intr_fast, NULL, info, name); if (err != 0) { - device_printf(ctx->ifc_dev, "_iflib_irq_alloc failed %d\n", err); + device_printf(dev, "_iflib_irq_alloc failed %d\n", err); return (err); } if (type == IFLIB_INTR_ADMIN) return (0); if (tqrid != -1) { - err = iflib_irq_set_affinity(ctx, rman_get_start(irq->ii_res), type, qid, gtask, tqg, q, name); + err = iflib_irq_set_affinity(ctx, irq, type, qid, gtask, tqg, + q, name); if (err) return (err); } else { - taskqgroup_attach(tqg, gtask, q, rman_get_start(irq->ii_res), name); + taskqgroup_attach(tqg, gtask, q, dev, irq->ii_res, name); } return (0); @@ -5746,7 +5754,6 @@ iflib_softirq_alloc_generic(if_ctx_t ctx, if_irq_t irq, iflib_intr_type_t type, struct taskqgroup *tqg; gtask_fn_t *fn; void *q; - int irq_num = -1; int err; switch (type) { @@ -5755,16 +5762,12 @@ iflib_softirq_alloc_generic(if_ctx_t ctx, if_irq_t irq, iflib_intr_type_t type, gtask = &ctx->ifc_txqs[qid].ift_task; tqg = qgroup_if_io_tqg; fn = _task_fn_tx; - if (irq != NULL) - irq_num = rman_get_start(irq->ii_res); break; case IFLIB_INTR_RX: q = &ctx->ifc_rxqs[qid]; gtask = &ctx->ifc_rxqs[qid].ifr_task; tqg = qgroup_if_io_tqg; fn = _task_fn_rx; - if (irq != NULL) - irq_num = rman_get_start(irq->ii_res); break; case IFLIB_INTR_IOV: q = ctx; @@ -5776,13 +5779,14 @@ iflib_softirq_alloc_generic(if_ctx_t ctx, if_irq_t irq, iflib_intr_type_t type, panic("unknown net intr type"); } GROUPTASK_INIT(gtask, 0, fn, q); - if (irq_num != -1) { - err = iflib_irq_set_affinity(ctx, irq_num, type, qid, gtask, tqg, q, name); + if (irq != NULL) { + err = iflib_irq_set_affinity(ctx, irq, type, qid, gtask, tqg, + q, name); if (err) - taskqgroup_attach(tqg, gtask, q, irq_num, name); - } - else { - taskqgroup_attach(tqg, gtask, q, irq_num, name); + taskqgroup_attach(tqg, gtask, q, ctx->ifc_dev, + irq->ii_res, name); + } else { + taskqgroup_attach(tqg, gtask, q, NULL, NULL, name); } } @@ -5805,7 +5809,9 @@ iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filter_arg, int * iflib_rxq_t rxq = ctx->ifc_rxqs; if_irq_t irq = &ctx->ifc_legacy_irq; iflib_filter_info_t info; + device_t dev; struct grouptask *gtask; + struct resource *res; struct taskqgroup *tqg; gtask_fn_t *fn; int tqrid; @@ -5825,14 +5831,17 @@ iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filter_arg, int * info->ifi_task = gtask; info->ifi_ctx = ctx; + dev = ctx->ifc_dev; /* We allocate a single interrupt resource */ if ((err = _iflib_irq_alloc(ctx, irq, tqrid, iflib_fast_intr_ctx, NULL, info, name)) != 0) return (err); GROUPTASK_INIT(gtask, 0, fn, q); - taskqgroup_attach(tqg, gtask, q, rman_get_start(irq->ii_res), name); + res = irq->ii_res; + taskqgroup_attach(tqg, gtask, q, dev, res, name); GROUPTASK_INIT(&txq->ift_task, 0, _task_fn_tx, txq); - taskqgroup_attach(qgroup_if_io_tqg, &txq->ift_task, txq, rman_get_start(irq->ii_res), "tx"); + taskqgroup_attach(qgroup_if_io_tqg, &txq->ift_task, txq, dev, res, + "tx"); return (0); } @@ -5882,7 +5891,8 @@ void iflib_io_tqg_attach(struct grouptask *gt, void *uniq, int cpu, char *name) { - taskqgroup_attach_cpu(qgroup_if_io_tqg, gt, uniq, cpu, -1, name); + taskqgroup_attach_cpu(qgroup_if_io_tqg, gt, uniq, cpu, NULL, NULL, + name); } void @@ -5891,7 +5901,8 @@ iflib_config_gtask_init(void *ctx, struct grouptask *gtask, gtask_fn_t *fn, { GROUPTASK_INIT(gtask, 0, fn, ctx); - taskqgroup_attach(qgroup_if_config_tqg, gtask, gtask, -1, name); + taskqgroup_attach(qgroup_if_config_tqg, gtask, gtask, NULL, NULL, + name); } void diff --git a/sys/sys/_task.h b/sys/sys/_task.h index 392dc874ae0..6ee48800cad 100644 --- a/sys/sys/_task.h +++ b/sys/sys/_task.h @@ -39,12 +39,11 @@ * field of struct task and the second argument is a count of how many * times the task was enqueued before the call to taskqueue_run(). * - * List of locks - * (c) const after init + * List of locks + * (c) const after init * (q) taskqueue lock */ typedef void task_fn_t(void *context, int pending); -typedef void gtask_fn_t(void *context); struct task { STAILQ_ENTRY(task) ta_link; /* (q) link for queue */ @@ -54,6 +53,10 @@ struct task { void *ta_context; /* (c) argument for handler */ }; +#ifdef _KERNEL + +typedef void gtask_fn_t(void *context); + struct gtask { STAILQ_ENTRY(gtask) ta_link; /* (q) link for queue */ uint16_t ta_flags; /* (q) state flags */ @@ -62,15 +65,6 @@ struct gtask { void *ta_context; /* (c) argument for handler */ }; -struct grouptask { - struct gtask gt_task; - void *gt_taskqueue; - LIST_ENTRY(grouptask) gt_list; - void *gt_uniq; -#define GROUPTASK_NAMELEN 32 - char gt_name[GROUPTASK_NAMELEN]; - int16_t gt_irq; - int16_t gt_cpu; -}; +#endif /* _KERNEL */ #endif /* !_SYS__TASK_H_ */ diff --git a/sys/sys/gtaskqueue.h b/sys/sys/gtaskqueue.h index a36c770adb9..a03bfebc09b 100644 --- a/sys/sys/gtaskqueue.h +++ b/sys/sys/gtaskqueue.h @@ -31,20 +31,35 @@ #ifndef _SYS_GTASKQUEUE_H_ #define _SYS_GTASKQUEUE_H_ -#include #ifndef _KERNEL #error "no user-serviceable parts inside" #endif +#include +#include +#include +#include + struct gtaskqueue; -typedef void (*gtaskqueue_enqueue_fn)(void *context); /* * Taskqueue groups. Manages dynamic thread groups and irq binding for * device and other tasks. */ +struct grouptask { + struct gtask gt_task; + void *gt_taskqueue; + LIST_ENTRY(grouptask) gt_list; + void *gt_uniq; +#define GROUPTASK_NAMELEN 32 + char gt_name[GROUPTASK_NAMELEN]; + device_t gt_dev; + struct resource *gt_irq; + int gt_cpu; +}; + void gtaskqueue_block(struct gtaskqueue *queue); void gtaskqueue_unblock(struct gtaskqueue *queue); @@ -55,28 +70,29 @@ void gtaskqueue_drain_all(struct gtaskqueue *queue); void grouptask_block(struct grouptask *grouptask); void grouptask_unblock(struct grouptask *grouptask); int grouptaskqueue_enqueue(struct gtaskqueue *queue, struct gtask *task); + void taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *grptask, - void *uniq, int irq, const char *name); -int taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *grptask, - void *uniq, int cpu, int irq, const char *name); + void *uniq, device_t dev, struct resource *irq, const char *name); +int taskqgroup_attach_cpu(struct taskqgroup *qgroup, + struct grouptask *grptask, void *uniq, int cpu, device_t dev, + struct resource *irq, const char *name); void taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask); struct taskqgroup *taskqgroup_create(const char *name); void taskqgroup_destroy(struct taskqgroup *qgroup); int taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride); -void taskqgroup_config_gtask_init(void *ctx, struct grouptask *gtask, gtask_fn_t *fn, - const char *name); +void taskqgroup_config_gtask_init(void *ctx, struct grouptask *gtask, + gtask_fn_t *fn, const char *name); void taskqgroup_config_gtask_deinit(struct grouptask *gtask); #define TASK_ENQUEUED 0x1 #define TASK_SKIP_WAKEUP 0x2 #define TASK_NOENQUEUE 0x4 - -#define GTASK_INIT(task, flags, priority, func, context) do { \ - (task)->ta_flags = flags; \ - (task)->ta_priority = (priority); \ - (task)->ta_func = (func); \ - (task)->ta_context = (context); \ +#define GTASK_INIT(gtask, flags, priority, func, context) do { \ + (gtask)->ta_flags = flags; \ + (gtask)->ta_priority = (priority); \ + (gtask)->ta_func = (func); \ + (gtask)->ta_context = (context); \ } while (0) #define GROUPTASK_INIT(gtask, priority, func, context) \ diff --git a/sys/sys/param.h b/sys/sys/param.h index d4bc7fe0c2c..531b09bc576 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -60,7 +60,7 @@ * in the range 5 to 9. */ #undef __FreeBSD_version -#define __FreeBSD_version 1300011 /* Master, propagated to newvers */ +#define __FreeBSD_version 1300012 /* Master, propagated to newvers */ /* * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD, From fbb1b16ad8bc7a6f9a32424ddbd61e59ad5a4d4d Mon Sep 17 00:00:00 2001 From: Martin Matuska Date: Tue, 12 Feb 2019 22:29:41 +0000 Subject: [PATCH 56/93] Update vendor/libarchive/dist to git 31c0a517c91f44eeee717a04db8b075cadda83d8 Relevant vendor changes: PR #1085: Fix a null pointer dereference bug in zip writer PR #1110: ZIP reader added support for XZ, LZMA, PPMD8 and BZIP2 decopmpression PR #1116: Add support for 64-bit ar format PR #1120: Fix a 7zip crash [1] and a ISO9660 infinite loop [2] PR #1125: RAR5 reader - fix an invalid read and a memory leak PR #1131: POSIX reader - do not fail when tree_current_lstat() fails due to ENOENT [3] PR #1134: Delete unnecessary null pointer checks before calls of free() OSS-Fuzz 10843: Force intermediate to uint64_t to make UBSAN happy. OSS-Fuzz 11011: Avoid buffer overflow in rar5 reader PR: 233006 [3] Security: CVE-2019-1000019 [1], CVE-2019-1000020 [2] --- .cirrus.yml | 26 + .travis.yml | 30 - CMakeLists.txt | 26 +- Makefile.am | 4 +- build/{ci_build.sh => ci/build.sh} | 35 +- build/ci/cirrus_ci.sh | 53 + build/{ci_test_driver => ci/test_driver} | 0 contrib/shar/tree.c | 3 +- cpio/test/test_option_t.c | 8 +- libarchive/CMakeLists.txt | 2 + libarchive/archive_acl.c | 24 +- libarchive/archive_disk_acl_sunos.c | 6 +- libarchive/archive_entry.c | 18 +- libarchive/archive_pack_dev.c | 3 + libarchive/archive_ppmd8.c | 1287 +++++++++++++++++ libarchive/archive_ppmd8_private.h | 148 ++ libarchive/archive_read_disk_posix.c | 30 +- libarchive/archive_read_open_file.c | 3 +- libarchive/archive_read_support_format_7zip.c | 11 +- libarchive/archive_read_support_format_ar.c | 8 +- libarchive/archive_read_support_format_cpio.c | 3 +- .../archive_read_support_format_iso9660.c | 17 +- libarchive/archive_read_support_format_rar5.c | 49 +- libarchive/archive_read_support_format_xar.c | 3 +- libarchive/archive_read_support_format_zip.c | 820 ++++++++++- libarchive/archive_write_disk_posix.c | 9 +- .../archive_write_disk_set_standard_lookup.c | 6 +- libarchive/archive_write_disk_windows.c | 15 +- libarchive/archive_write_set_format_ar.c | 5 + libarchive/archive_write_set_format_cpio.c | 3 +- .../archive_write_set_format_cpio_newc.c | 3 +- libarchive/archive_write_set_format_gnutar.c | 3 +- libarchive/archive_write_set_format_shar.c | 6 +- libarchive/archive_write_set_format_ustar.c | 9 +- libarchive/archive_write_set_format_v7tar.c | 9 +- libarchive/archive_write_set_format_zip.c | 9 +- libarchive/test/CMakeLists.txt | 1 - .../test/test_compat_pax_libarchive_2x.c | 153 -- .../test_compat_pax_libarchive_2x.tar.Z.uu | 15 - libarchive/test/test_read_format_zip.c | 447 ++++++ .../test/test_read_format_zip_bzip2.zipx.uu | 19 + .../test_read_format_zip_bzip2_multi.zipx.uu | 96 ++ .../test/test_read_format_zip_lzma.zipx.uu | 19 + .../test_read_format_zip_lzma_multi.zipx.uu | 95 ++ .../test/test_read_format_zip_ppmd8.zipx.uu | 17 + .../test_read_format_zip_ppmd8_multi.zipx.uu | 84 ++ .../test_read_format_zip_xz_multi.zipx.uu | 125 ++ test_utils/test_main.c | 6 +- 48 files changed, 3373 insertions(+), 398 deletions(-) create mode 100644 .cirrus.yml delete mode 100644 .travis.yml rename build/{ci_build.sh => ci/build.sh} (72%) create mode 100755 build/ci/cirrus_ci.sh rename build/{ci_test_driver => ci/test_driver} (100%) create mode 100644 libarchive/archive_ppmd8.c create mode 100644 libarchive/archive_ppmd8_private.h delete mode 100644 libarchive/test/test_compat_pax_libarchive_2x.c delete mode 100644 libarchive/test/test_compat_pax_libarchive_2x.tar.Z.uu create mode 100644 libarchive/test/test_read_format_zip_bzip2.zipx.uu create mode 100644 libarchive/test/test_read_format_zip_bzip2_multi.zipx.uu create mode 100644 libarchive/test/test_read_format_zip_lzma.zipx.uu create mode 100644 libarchive/test/test_read_format_zip_lzma_multi.zipx.uu create mode 100644 libarchive/test/test_read_format_zip_ppmd8.zipx.uu create mode 100644 libarchive/test/test_read_format_zip_ppmd8_multi.zipx.uu create mode 100644 libarchive/test/test_read_format_zip_xz_multi.zipx.uu diff --git a/.cirrus.yml b/.cirrus.yml new file mode 100644 index 00000000000..28d38213f95 --- /dev/null +++ b/.cirrus.yml @@ -0,0 +1,26 @@ +env: + CIRRUS_CLONE_DEPTH: 1 + ARCH: amd64 + +task: + matrix: + container: + image: fedora:29 + freebsd_instance: + image: freebsd-12-0-release-amd64 + freebsd_instance: + image: freebsd-11-2-release-amd64 + osx_instance: + image: mojave-xcode-10.1 + osx_instance: + image: high-sierra-xcode-10.0 + matrix: + env: + BS: autotools + env: + BS: cmake + install_script: + - ./build/ci/cirrus_ci.sh install + script: + - ./build/ci/build.sh + - ./build/ci/cirrus_ci.sh test diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 9f262b64a1c..00000000000 --- a/.travis.yml +++ /dev/null @@ -1,30 +0,0 @@ -language: C -sudo: false -dist: xenial -addons: - apt: - packages: - - libacl1-dev - - libbz2-dev - - liblzma-dev - - libzip-dev - - lzop -os: - - linux - - osx -compiler: - - gcc - - clang -env: - - BUILD_SYSTEM=cmake - - BUILD_SYSTEM=autotools -matrix: - exclude: - - os: osx - compiler: gcc -before_install: - - if [ `uname` = "Darwin" ]; then brew update; fi -install: - - if [ `uname` = "Darwin" ]; then brew install xz lz4 zstd; fi -script: - - build/ci_build.sh diff --git a/CMakeLists.txt b/CMakeLists.txt index 81aa1aebe73..bd609eb408e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -183,6 +183,7 @@ OPTION(ENABLE_LIBB2 "Enable the use of the system LIBB2 library if found" ON) OPTION(ENABLE_LZ4 "Enable the use of the system LZ4 library if found" ON) OPTION(ENABLE_LZO "Enable the use of the system LZO library if found" OFF) OPTION(ENABLE_LZMA "Enable the use of the system LZMA library if found" ON) +OPTION(ENABLE_ZSTD "Enable the use of the system zstd library if found" ON) OPTION(ENABLE_ZLIB "Enable the use of the system ZLIB library if found" ON) OPTION(ENABLE_BZip2 "Enable the use of the system BZip2 library if found" ON) @@ -458,7 +459,7 @@ MARK_AS_ADVANCED(CLEAR BZIP2_LIBRARIES) IF(ENABLE_LZMA) FIND_PACKAGE(LibLZMA) ELSE() - SET(LIBZMA_FOUND FALSE) # Override cached value + SET(LIBLZMA_FOUND FALSE) # Override cached value ENDIF() IF(LIBLZMA_FOUND) @@ -480,6 +481,9 @@ IF(LIBLZMA_FOUND) ELSE(LIBLZMA_FOUND) # LZMA not found and will not be used. ENDIF(LIBLZMA_FOUND) +MARK_AS_ADVANCED(CLEAR LIBLZMA_INCLUDE_DIR) +MARK_AS_ADVANCED(CLEAR LIBLZMA_LIBRARY) + # # Find LZO2 # @@ -569,15 +573,19 @@ MARK_AS_ADVANCED(CLEAR LZ4_LIBRARY) # # Find Zstd # -IF (ZSTD_INCLUDE_DIR) - # Already in cache, be silent - SET(ZSTD_FIND_QUIETLY TRUE) -ENDIF (ZSTD_INCLUDE_DIR) +IF(ENABLE_ZSTD) + IF (ZSTD_INCLUDE_DIR) + # Already in cache, be silent + SET(ZSTD_FIND_QUIETLY TRUE) + ENDIF (ZSTD_INCLUDE_DIR) -FIND_PATH(ZSTD_INCLUDE_DIR zstd.h) -FIND_LIBRARY(ZSTD_LIBRARY NAMES zstd libzstd) -INCLUDE(FindPackageHandleStandardArgs) -FIND_PACKAGE_HANDLE_STANDARD_ARGS(ZSTD DEFAULT_MSG ZSTD_LIBRARY ZSTD_INCLUDE_DIR) + FIND_PATH(ZSTD_INCLUDE_DIR zstd.h) + FIND_LIBRARY(ZSTD_LIBRARY NAMES zstd libzstd) + INCLUDE(FindPackageHandleStandardArgs) + FIND_PACKAGE_HANDLE_STANDARD_ARGS(ZSTD DEFAULT_MSG ZSTD_LIBRARY ZSTD_INCLUDE_DIR) +ELSE(ENABLE_ZSTD) + SET(ZSTD_FOUND FALSE) # Override cached value +ENDIF(ENABLE_ZSTD) IF(ZSTD_FOUND) SET(HAVE_ZSTD_H 1) INCLUDE_DIRECTORIES(${ZSTD_INCLUDE_DIR}) diff --git a/Makefile.am b/Makefile.am index 80a4e17d1b7..0e8056be1ac 100644 --- a/Makefile.am +++ b/Makefile.am @@ -132,6 +132,8 @@ libarchive_la_SOURCES= \ libarchive/archive_ppmd_private.h \ libarchive/archive_ppmd7.c \ libarchive/archive_ppmd7_private.h \ + libarchive/archive_ppmd8.c \ + libarchive/archive_ppmd8_private.h \ libarchive/archive_private.h \ libarchive/archive_random.c \ libarchive/archive_random_private.h \ @@ -400,7 +402,6 @@ libarchive_test_SOURCES= \ libarchive/test/test_compat_lzma.c \ libarchive/test/test_compat_lzop.c \ libarchive/test/test_compat_mac.c \ - libarchive/test/test_compat_pax_libarchive_2x.c \ libarchive/test/test_compat_perl_archive_tar.c \ libarchive/test/test_compat_plexus_archiver_tar.c \ libarchive/test/test_compat_solaris_tar_acl.c \ @@ -671,7 +672,6 @@ libarchive_test_EXTRA_DIST=\ libarchive/test/test_compat_lzop_3.tar.lzo.uu \ libarchive/test/test_compat_mac-1.tar.Z.uu \ libarchive/test/test_compat_mac-2.tar.Z.uu \ - libarchive/test/test_compat_pax_libarchive_2x.tar.Z.uu \ libarchive/test/test_compat_perl_archive_tar.tar.uu \ libarchive/test/test_compat_plexus_archiver_tar.tar.uu \ libarchive/test/test_compat_solaris_pax_sparse_1.pax.Z.uu \ diff --git a/build/ci_build.sh b/build/ci/build.sh similarity index 72% rename from build/ci_build.sh rename to build/ci/build.sh index 65e5ceb5447..d61336eafb0 100755 --- a/build/ci_build.sh +++ b/build/ci/build.sh @@ -3,15 +3,17 @@ # Automated build and test of libarchive on CI systems # # Variables that can be passed via environment: -# BUILD_SYSTEM= -# BUILDDIR= -# SRCDIR= -# CONFIGURE_ARGS= -# MAKE_ARGS= -# +# BS= # build system (autotools or cmake) +# BUILDDIR= # build directory +# SRCDIR= # source directory +# CONFIGURE_ARGS= # configure arguments +# MAKE_ARGS= # make arguments ACTIONS= -BUILD_SYSTEM="${BUILD_SYSTEM:-autotools}" +if [ -n "${BUILD_SYSTEM}" ]; then + BS="${BUILD_SYSTEM}" +fi +BS="${BS:-autotools}" MAKE="${MAKE:-make}" CMAKE="${CMAKE:-cmake}" CURDIR=`pwd` @@ -38,8 +40,8 @@ while getopts a:b:d:s: opt; do esac ACTIONS="${ACTIONS} ${OPTARG}" ;; - b) BUILD_SYSTEM="${OPTARG}" - case "${BUILD_SYSTEM}" in + b) BS="${OPTARG}" + case "${BS}" in autotools) ;; cmake) ;; *) inputerror "Invalid build system (-b)" ;; @@ -59,18 +61,18 @@ done if [ -z "${ACTIONS}" ]; then ACTIONS="autogen configure build test" fi -if [ -z "${BUILD_SYSTEM}" ]; then - inputerror "Missing type (-t) parameter" +if [ -z "${BS}" ]; then + inputerror "Missing build system (-b) parameter" fi if [ -z "${BUILDDIR}" ]; then - BUILDDIR="${CURDIR}/build_ci/${BUILD_SYSTEM}" + BUILDDIR="${CURDIR}/build_ci/${BS}" fi mkdir -p "${BUILDDIR}" for action in ${ACTIONS}; do cd "${BUILDDIR}" case "${action}" in autogen) - case "${BUILD_SYSTEM}" in + case "${BS}" in autotools) cd "${SRCDIR}" sh build/autogen.sh @@ -79,7 +81,7 @@ for action in ${ACTIONS}; do esac ;; configure) - case "${BUILD_SYSTEM}" in + case "${BS}" in autotools) "${SRCDIR}/configure" ${CONFIGURE_ARGS} ;; cmake) ${CMAKE} ${CONFIGURE_ARGS} "${SRCDIR}" ;; esac @@ -90,15 +92,16 @@ for action in ${ACTIONS}; do RET="$?" ;; test) - case "${BUILD_SYSTEM}" in + case "${BS}" in autotools) - ${MAKE} ${MAKE_ARGS} check LOG_DRIVER="${SRCDIR}/build/ci_test_driver" + ${MAKE} ${MAKE_ARGS} check LOG_DRIVER="${SRCDIR}/build/ci/test_driver" ;; cmake) ${MAKE} ${MAKE_ARGS} test ;; esac RET="$?" + find ${TMPDIR:-/tmp} -path '*_test.*' -name '*.log' -print -exec cat {} \; ;; esac if [ "${RET}" != "0" ]; then diff --git a/build/ci/cirrus_ci.sh b/build/ci/cirrus_ci.sh new file mode 100755 index 00000000000..9db762f97cc --- /dev/null +++ b/build/ci/cirrus_ci.sh @@ -0,0 +1,53 @@ +#!/bin/sh +UNAME=`uname` +if [ "$1" = "install" ] +then + if [ "${UNAME}" = "FreeBSD" ] + then + set -x -e + sed -i.bak -e 's,pkg+http://pkg.FreeBSD.org/\${ABI}/quarterly,pkg+http://pkg.FreeBSD.org/\${ABI}/latest,' /etc/pkg/FreeBSD.conf + mount -u -o acls / + mkdir /tmp_acl_nfsv4 + MD=`mdconfig -a -t swap -s 128M` + newfs /dev/$MD + tunefs -N enable /dev/$MD + mount /dev/$MD /tmp_acl_nfsv4 + chmod 1777 /tmp_acl_nfsv4 + pkg install -y autoconf automake cmake libiconv libtool pkgconf expat libxml2 liblz4 zstd + elif [ "${UNAME}" = "Darwin" ] + then + set -x -e + brew update + brew install autoconf automake libtool pkg-config cmake xz lz4 zstd + elif [ "${UNAME}" = "Linux" ] + then + if [ -f "/etc/debian_version" ] + then + apt-get -y update + apt-get -y install build-essential locales automake libtool bison sharutils pkgconf libacl1-dev libbz2-dev libzip-dev zlib1g-dev liblzma-dev liblz4-dev libzstd-dev libssl-dev lrzip cmake + elif [ -f "/etc/fedora-release" ] + then + dnf -y install make cmake gcc gcc-c++ kernel-devel automake libtool bison sharutils pkgconf libacl-devel librichacl-devel bzip2-devel libzip-devel zlib-devel xz-devel lz4-devel libzstd-devel openssl-devel + fi + fi +elif [ "$1" = "test" ] +then + if [ "${UNAME}" = "FreeBSD" ] + then + set -e + echo "Additional NFSv4 ACL tests" + CURDIR=`pwd` + if [ "${BS}" = "cmake" ] + then + BIN_SUBDIR="bin" + else + BIN_SUBDIR=. + fi + BUILDDIR="${CURDIR}/build_ci/${BS}" + cd "$BUILDDIR" + TMPDIR=/tmp_acl_nfsv4 ${BIN_SUBDIR}/libarchive_test -r "${CURDIR}/libarchive/test" -v test_acl_platform_nfs4 + fi +else + echo "Usage $0 install | test_nfsv4_acls" + exit 1 +fi diff --git a/build/ci_test_driver b/build/ci/test_driver similarity index 100% rename from build/ci_test_driver rename to build/ci/test_driver diff --git a/contrib/shar/tree.c b/contrib/shar/tree.c index d5a04abf5f4..a80d8366bde 100644 --- a/contrib/shar/tree.c +++ b/contrib/shar/tree.c @@ -530,8 +530,7 @@ tree_close(struct tree *t) /* Release anything remaining in the stack. */ while (t->stack != NULL) tree_pop(t); - if (t->buff) - free(t->buff); + free(t->buff); /* chdir() back to where we started. */ if (t->initialDirFd >= 0) { fchdir(t->initialDirFd); diff --git a/cpio/test/test_option_t.c b/cpio/test/test_option_t.c index 6bcaee3c87c..eaa73fa3a01 100644 --- a/cpio/test/test_option_t.c +++ b/cpio/test/test_option_t.c @@ -88,11 +88,11 @@ DEFINE_TEST(test_option_t) setlocale(LC_ALL, ""); #endif #if defined(_WIN32) && !defined(__CYGWIN__) - strftime(date2, sizeof(date), "%b %d %Y", localtime(&mtime)); - _snprintf(date, sizeof(date)-1, "%12s file", date2); + strftime(date2, sizeof(date2)-1, "%b %d %Y", localtime(&mtime)); + _snprintf(date, sizeof(date)-1, "%12.12s file", date2); #else - strftime(date2, sizeof(date), "%b %e %Y", localtime(&mtime)); - snprintf(date, sizeof(date)-1, "%12s file", date2); + strftime(date2, sizeof(date2)-1, "%b %e %Y", localtime(&mtime)); + snprintf(date, sizeof(date)-1, "%12.12s file", date2); #endif assertEqualMem(p + 42, date, strlen(date)); free(p); diff --git a/libarchive/CMakeLists.txt b/libarchive/CMakeLists.txt index 79719773cd5..8e86aade226 100644 --- a/libarchive/CMakeLists.txt +++ b/libarchive/CMakeLists.txt @@ -51,6 +51,8 @@ SET(libarchive_SOURCES archive_platform_acl.h archive_platform_xattr.h archive_ppmd_private.h + archive_ppmd8.c + archive_ppmd8_private.h archive_ppmd7.c archive_ppmd7_private.h archive_private.h diff --git a/libarchive/archive_acl.c b/libarchive/archive_acl.c index 7beeee86efe..952e20df4dc 100644 --- a/libarchive/archive_acl.c +++ b/libarchive/archive_acl.c @@ -138,14 +138,10 @@ archive_acl_clear(struct archive_acl *acl) free(acl->acl_head); acl->acl_head = ap; } - if (acl->acl_text_w != NULL) { - free(acl->acl_text_w); - acl->acl_text_w = NULL; - } - if (acl->acl_text != NULL) { - free(acl->acl_text); - acl->acl_text = NULL; - } + free(acl->acl_text_w); + acl->acl_text_w = NULL; + free(acl->acl_text); + acl->acl_text = NULL; acl->acl_p = NULL; acl->acl_types = 0; acl->acl_state = 0; /* Not counting. */ @@ -324,14 +320,10 @@ acl_new_entry(struct archive_acl *acl, return (NULL); } - if (acl->acl_text_w != NULL) { - free(acl->acl_text_w); - acl->acl_text_w = NULL; - } - if (acl->acl_text != NULL) { - free(acl->acl_text); - acl->acl_text = NULL; - } + free(acl->acl_text_w); + acl->acl_text_w = NULL; + free(acl->acl_text); + acl->acl_text = NULL; /* * If there's a matching entry already in the list, overwrite it. diff --git a/libarchive/archive_disk_acl_sunos.c b/libarchive/archive_disk_acl_sunos.c index bc84fd6782f..b0f5dfad9b1 100644 --- a/libarchive/archive_disk_acl_sunos.c +++ b/libarchive/archive_disk_acl_sunos.c @@ -145,10 +145,8 @@ sunacl_get(int cmd, int *aclcnt, int fd, const char *path) cnt = facl(fd, cmd, cnt, aclp); } } else { - if (aclp != NULL) { - free(aclp); - aclp = NULL; - } + free(aclp); + aclp = NULL; break; } } diff --git a/libarchive/archive_entry.c b/libarchive/archive_entry.c index f722bbe85c7..5125a2eb284 100644 --- a/libarchive/archive_entry.c +++ b/libarchive/archive_entry.c @@ -1560,10 +1560,8 @@ archive_entry_acl_text_compat(int *flags) const wchar_t * archive_entry_acl_text_w(struct archive_entry *entry, int flags) { - if (entry->acl.acl_text_w != NULL) { - free(entry->acl.acl_text_w); - entry->acl.acl_text_w = NULL; - } + free(entry->acl.acl_text_w); + entry->acl.acl_text_w = NULL; if (archive_entry_acl_text_compat(&flags) == 0) entry->acl.acl_text_w = archive_acl_to_text_w(&entry->acl, NULL, flags, entry->archive); @@ -1574,10 +1572,8 @@ archive_entry_acl_text_w(struct archive_entry *entry, int flags) const char * archive_entry_acl_text(struct archive_entry *entry, int flags) { - if (entry->acl.acl_text != NULL) { - free(entry->acl.acl_text); - entry->acl.acl_text = NULL; - } + free(entry->acl.acl_text); + entry->acl.acl_text = NULL; if (archive_entry_acl_text_compat(&flags) == 0) entry->acl.acl_text = archive_acl_to_text_l(&entry->acl, NULL, flags, NULL); @@ -1590,10 +1586,8 @@ int _archive_entry_acl_text_l(struct archive_entry *entry, int flags, const char **acl_text, size_t *len, struct archive_string_conv *sc) { - if (entry->acl.acl_text != NULL) { - free(entry->acl.acl_text); - entry->acl.acl_text = NULL; - } + free(entry->acl.acl_text); + entry->acl.acl_text = NULL; if (archive_entry_acl_text_compat(&flags) == 0) entry->acl.acl_text = archive_acl_to_text_l(&entry->acl, diff --git a/libarchive/archive_pack_dev.c b/libarchive/archive_pack_dev.c index 53bddd790a3..a5e57ac209d 100644 --- a/libarchive/archive_pack_dev.c +++ b/libarchive/archive_pack_dev.c @@ -60,6 +60,9 @@ __RCSID("$NetBSD$"); #ifdef HAVE_SYS_SYSMACROS_H #include #endif +#ifdef HAVE_SYS_MKDEV_H +#include +#endif #ifdef HAVE_UNISTD_H #include #endif diff --git a/libarchive/archive_ppmd8.c b/libarchive/archive_ppmd8.c new file mode 100644 index 00000000000..d1779395dac --- /dev/null +++ b/libarchive/archive_ppmd8.c @@ -0,0 +1,1287 @@ +/* Ppmd8.c -- PPMdI codec +2016-05-21 : Igor Pavlov : Public domain +This code is based on PPMd var.I (2002): Dmitry Shkarin : Public domain */ + +#include "archive_platform.h" + +#include + +#include "archive_ppmd8_private.h" + +const Byte PPMD8_kExpEscape[16] = { 25, 14, 9, 7, 5, 5, 4, 4, 4, 3, 3, 3, 2, 2, 2, 2 }; +static const UInt16 kInitBinEsc[] = { 0x3CDD, 0x1F3F, 0x59BF, 0x48F3, 0x64A1, 0x5ABC, 0x6632, 0x6051}; + +#define MAX_FREQ 124 +#define UNIT_SIZE 12 + +#define U2B(nu) ((UInt32)(nu) * UNIT_SIZE) +#define U2I(nu) (p->Units2Indx[(nu) - 1]) +#define I2U(indx) (p->Indx2Units[indx]) + +#ifdef PPMD_32BIT + #define REF(ptr) (ptr) +#else + #define REF(ptr) ((UInt32)((Byte *)(ptr) - (p)->Base)) +#endif + +#define STATS_REF(ptr) ((CPpmd_State_Ref)REF(ptr)) + +#define CTX(ref) ((CPpmd8_Context *)Ppmd8_GetContext(p, ref)) +#define STATS(ctx) Ppmd8_GetStats(p, ctx) +#define ONE_STATE(ctx) Ppmd8Context_OneState(ctx) +#define SUFFIX(ctx) CTX((ctx)->Suffix) + +#define kTop (1 << 24) +#define kBot (1 << 15) + +typedef CPpmd8_Context * CTX_PTR; + +struct CPpmd8_Node_; + +typedef + #ifdef PPMD_32BIT + struct CPpmd8_Node_ * + #else + UInt32 + #endif + CPpmd8_Node_Ref; + +typedef struct CPpmd8_Node_ +{ + UInt32 Stamp; + CPpmd8_Node_Ref Next; + UInt32 NU; +} CPpmd8_Node; + +#ifdef PPMD_32BIT + #define NODE(ptr) (ptr) +#else + #define NODE(offs) ((CPpmd8_Node *)(p->Base + (offs))) +#endif + +#define EMPTY_NODE 0xFFFFFFFF + +void Ppmd8_Construct(CPpmd8 *p) +{ + unsigned i, k, m; + + p->Base = 0; + + for (i = 0, k = 0; i < PPMD_NUM_INDEXES; i++) + { + unsigned step = (i >= 12 ? 4 : (i >> 2) + 1); + do { p->Units2Indx[k++] = (Byte)i; } while (--step); + p->Indx2Units[i] = (Byte)k; + } + + p->NS2BSIndx[0] = (0 << 1); + p->NS2BSIndx[1] = (1 << 1); + memset(p->NS2BSIndx + 2, (2 << 1), 9); + memset(p->NS2BSIndx + 11, (3 << 1), 256 - 11); + + for (i = 0; i < 5; i++) + p->NS2Indx[i] = (Byte)i; + for (m = i, k = 1; i < 260; i++) + { + p->NS2Indx[i] = (Byte)m; + if (--k == 0) + k = (++m) - 4; + } +} + +void Ppmd8_Free(CPpmd8 *p) +{ + free(p->Base); + p->Size = 0; + p->Base = 0; +} + +Bool Ppmd8_Alloc(CPpmd8 *p, UInt32 size) +{ + if (p->Base == 0 || p->Size != size) + { + Ppmd8_Free(p); + p->AlignOffset = + #ifdef PPMD_32BIT + (4 - size) & 3; + #else + 4 - (size & 3); + #endif + if ((p->Base = (Byte *)malloc(p->AlignOffset + size)) == 0) + return False; + p->Size = size; + } + return True; +} + +static void InsertNode(CPpmd8 *p, void *node, unsigned indx) +{ + ((CPpmd8_Node *)node)->Stamp = EMPTY_NODE; + ((CPpmd8_Node *)node)->Next = (CPpmd8_Node_Ref)p->FreeList[indx]; + ((CPpmd8_Node *)node)->NU = I2U(indx); + p->FreeList[indx] = REF(node); + p->Stamps[indx]++; +} + +static void *RemoveNode(CPpmd8 *p, unsigned indx) +{ + CPpmd8_Node *node = NODE((CPpmd8_Node_Ref)p->FreeList[indx]); + p->FreeList[indx] = node->Next; + p->Stamps[indx]--; + return node; +} + +static void SplitBlock(CPpmd8 *p, void *ptr, unsigned oldIndx, unsigned newIndx) +{ + unsigned i, nu = I2U(oldIndx) - I2U(newIndx); + ptr = (Byte *)ptr + U2B(I2U(newIndx)); + if (I2U(i = U2I(nu)) != nu) + { + unsigned k = I2U(--i); + InsertNode(p, ((Byte *)ptr) + U2B(k), nu - k - 1); + } + InsertNode(p, ptr, i); +} + +static void GlueFreeBlocks(CPpmd8 *p) +{ + CPpmd8_Node_Ref head = 0; + CPpmd8_Node_Ref *prev = &head; + unsigned i; + + p->GlueCount = 1 << 13; + memset(p->Stamps, 0, sizeof(p->Stamps)); + + /* Order-0 context is always at top UNIT, so we don't need guard NODE at the end. + All blocks up to p->LoUnit can be free, so we need guard NODE at LoUnit. */ + if (p->LoUnit != p->HiUnit) + ((CPpmd8_Node *)p->LoUnit)->Stamp = 0; + + /* Glue free blocks */ + for (i = 0; i < PPMD_NUM_INDEXES; i++) + { + CPpmd8_Node_Ref next = (CPpmd8_Node_Ref)p->FreeList[i]; + p->FreeList[i] = 0; + while (next != 0) + { + CPpmd8_Node *node = NODE(next); + if (node->NU != 0) + { + CPpmd8_Node *node2; + *prev = next; + prev = &(node->Next); + while ((node2 = node + node->NU)->Stamp == EMPTY_NODE) + { + node->NU += node2->NU; + node2->NU = 0; + } + } + next = node->Next; + } + } + *prev = 0; + + /* Fill lists of free blocks */ + while (head != 0) + { + CPpmd8_Node *node = NODE(head); + unsigned nu; + head = node->Next; + nu = node->NU; + if (nu == 0) + continue; + for (; nu > 128; nu -= 128, node += 128) + InsertNode(p, node, PPMD_NUM_INDEXES - 1); + if (I2U(i = U2I(nu)) != nu) + { + unsigned k = I2U(--i); + InsertNode(p, node + k, nu - k - 1); + } + InsertNode(p, node, i); + } +} + +static void *AllocUnitsRare(CPpmd8 *p, unsigned indx) +{ + unsigned i; + void *retVal; + if (p->GlueCount == 0) + { + GlueFreeBlocks(p); + if (p->FreeList[indx] != 0) + return RemoveNode(p, indx); + } + i = indx; + do + { + if (++i == PPMD_NUM_INDEXES) + { + UInt32 numBytes = U2B(I2U(indx)); + p->GlueCount--; + return ((UInt32)(p->UnitsStart - p->Text) > numBytes) ? (p->UnitsStart -= numBytes) : (NULL); + } + } + while (p->FreeList[i] == 0); + retVal = RemoveNode(p, i); + SplitBlock(p, retVal, i, indx); + return retVal; +} + +static void *AllocUnits(CPpmd8 *p, unsigned indx) +{ + UInt32 numBytes; + if (p->FreeList[indx] != 0) + return RemoveNode(p, indx); + numBytes = U2B(I2U(indx)); + if (numBytes <= (UInt32)(p->HiUnit - p->LoUnit)) + { + void *retVal = p->LoUnit; + p->LoUnit += numBytes; + return retVal; + } + return AllocUnitsRare(p, indx); +} + +#define MyMem12Cpy(dest, src, num) \ + { UInt32 *d = (UInt32 *)dest; const UInt32 *z = (const UInt32 *)src; UInt32 n = num; \ + do { d[0] = z[0]; d[1] = z[1]; d[2] = z[2]; z += 3; d += 3; } while (--n); } + +static void *ShrinkUnits(CPpmd8 *p, void *oldPtr, unsigned oldNU, unsigned newNU) +{ + unsigned i0 = U2I(oldNU); + unsigned i1 = U2I(newNU); + if (i0 == i1) + return oldPtr; + if (p->FreeList[i1] != 0) + { + void *ptr = RemoveNode(p, i1); + MyMem12Cpy(ptr, oldPtr, newNU); + InsertNode(p, oldPtr, i0); + return ptr; + } + SplitBlock(p, oldPtr, i0, i1); + return oldPtr; +} + +static void FreeUnits(CPpmd8 *p, void *ptr, unsigned nu) +{ + InsertNode(p, ptr, U2I(nu)); +} + +static void SpecialFreeUnit(CPpmd8 *p, void *ptr) +{ + if ((Byte *)ptr != p->UnitsStart) + InsertNode(p, ptr, 0); + else + { + #ifdef PPMD8_FREEZE_SUPPORT + *(UInt32 *)ptr = EMPTY_NODE; /* it's used for (Flags == 0xFF) check in RemoveBinContexts */ + #endif + p->UnitsStart += UNIT_SIZE; + } +} + +static void *MoveUnitsUp(CPpmd8 *p, void *oldPtr, unsigned nu) +{ + unsigned indx = U2I(nu); + void *ptr; + if ((Byte *)oldPtr > p->UnitsStart + 16 * 1024 || REF(oldPtr) > p->FreeList[indx]) + return oldPtr; + ptr = RemoveNode(p, indx); + MyMem12Cpy(ptr, oldPtr, nu); + if ((Byte*)oldPtr != p->UnitsStart) + InsertNode(p, oldPtr, indx); + else + p->UnitsStart += U2B(I2U(indx)); + return ptr; +} + +static void ExpandTextArea(CPpmd8 *p) +{ + UInt32 count[PPMD_NUM_INDEXES]; + unsigned i; + memset(count, 0, sizeof(count)); + if (p->LoUnit != p->HiUnit) + ((CPpmd8_Node *)p->LoUnit)->Stamp = 0; + + { + CPpmd8_Node *node = (CPpmd8_Node *)p->UnitsStart; + for (; node->Stamp == EMPTY_NODE; node += node->NU) + { + node->Stamp = 0; + count[U2I(node->NU)]++; + } + p->UnitsStart = (Byte *)node; + } + + for (i = 0; i < PPMD_NUM_INDEXES; i++) + { + CPpmd8_Node_Ref *next = (CPpmd8_Node_Ref *)&p->FreeList[i]; + while (count[i] != 0) + { + CPpmd8_Node *node = NODE(*next); + while (node->Stamp == 0) + { + *next = node->Next; + node = NODE(*next); + p->Stamps[i]--; + if (--count[i] == 0) + break; + } + next = &node->Next; + } + } +} + +#define SUCCESSOR(p) ((CPpmd_Void_Ref)((p)->SuccessorLow | ((UInt32)(p)->SuccessorHigh << 16))) + +static void SetSuccessor(CPpmd_State *p, CPpmd_Void_Ref v) +{ + (p)->SuccessorLow = (UInt16)((UInt32)(v) & 0xFFFF); + (p)->SuccessorHigh = (UInt16)(((UInt32)(v) >> 16) & 0xFFFF); +} + +#define RESET_TEXT(offs) { p->Text = p->Base + p->AlignOffset + (offs); } + +static void RestartModel(CPpmd8 *p) +{ + unsigned i, k, m, r; + + memset(p->FreeList, 0, sizeof(p->FreeList)); + memset(p->Stamps, 0, sizeof(p->Stamps)); + RESET_TEXT(0); + p->HiUnit = p->Text + p->Size; + p->LoUnit = p->UnitsStart = p->HiUnit - p->Size / 8 / UNIT_SIZE * 7 * UNIT_SIZE; + p->GlueCount = 0; + + p->OrderFall = p->MaxOrder; + p->RunLength = p->InitRL = -(Int32)((p->MaxOrder < 12) ? p->MaxOrder : 12) - 1; + p->PrevSuccess = 0; + + p->MinContext = p->MaxContext = (CTX_PTR)(p->HiUnit -= UNIT_SIZE); /* AllocContext(p); */ + p->MinContext->Suffix = 0; + p->MinContext->NumStats = 255; + p->MinContext->Flags = 0; + p->MinContext->SummFreq = 256 + 1; + p->FoundState = (CPpmd_State *)p->LoUnit; /* AllocUnits(p, PPMD_NUM_INDEXES - 1); */ + p->LoUnit += U2B(256 / 2); + p->MinContext->Stats = REF(p->FoundState); + for (i = 0; i < 256; i++) + { + CPpmd_State *s = &p->FoundState[i]; + s->Symbol = (Byte)i; + s->Freq = 1; + SetSuccessor(s, 0); + } + + for (i = m = 0; m < 25; m++) + { + while (p->NS2Indx[i] == m) + i++; + for (k = 0; k < 8; k++) + { + UInt16 val = (UInt16)(PPMD_BIN_SCALE - kInitBinEsc[k] / (i + 1)); + UInt16 *dest = p->BinSumm[m] + k; + for (r = 0; r < 64; r += 8) + dest[r] = val; + } + } + + for (i = m = 0; m < 24; m++) + { + while (p->NS2Indx[i + 3] == m + 3) + i++; + for (k = 0; k < 32; k++) + { + CPpmd_See *s = &p->See[m][k]; + s->Summ = (UInt16)((2 * i + 5) << (s->Shift = PPMD_PERIOD_BITS - 4)); + s->Count = 7; + } + } +} + +void Ppmd8_Init(CPpmd8 *p, unsigned maxOrder, unsigned restoreMethod) +{ + p->MaxOrder = maxOrder; + p->RestoreMethod = restoreMethod; + RestartModel(p); + p->DummySee.Shift = PPMD_PERIOD_BITS; + p->DummySee.Summ = 0; /* unused */ + p->DummySee.Count = 64; /* unused */ +} + +static void Refresh(CPpmd8 *p, CTX_PTR ctx, unsigned oldNU, unsigned scale) +{ + unsigned i = ctx->NumStats, escFreq, sumFreq, flags; + CPpmd_State *s = (CPpmd_State *)ShrinkUnits(p, STATS(ctx), oldNU, (i + 2) >> 1); + ctx->Stats = REF(s); + #ifdef PPMD8_FREEZE_SUPPORT + /* fixed over Shkarin's code. Fixed code is not compatible with original code for some files in FREEZE mode. */ + scale |= (ctx->SummFreq >= ((UInt32)1 << 15)); + #endif + flags = (ctx->Flags & (0x10 + 0x04 * scale)) + 0x08 * (s->Symbol >= 0x40); + escFreq = ctx->SummFreq - s->Freq; + sumFreq = (s->Freq = (Byte)((s->Freq + scale) >> scale)); + do + { + escFreq -= (++s)->Freq; + sumFreq += (s->Freq = (Byte)((s->Freq + scale) >> scale)); + flags |= 0x08 * (s->Symbol >= 0x40); + } + while (--i); + ctx->SummFreq = (UInt16)(sumFreq + ((escFreq + scale) >> scale)); + ctx->Flags = (Byte)flags; +} + +static void SwapStates(CPpmd_State *t1, CPpmd_State *t2) +{ + CPpmd_State tmp = *t1; + *t1 = *t2; + *t2 = tmp; +} + +static CPpmd_Void_Ref CutOff(CPpmd8 *p, CTX_PTR ctx, unsigned order) +{ + int i; + unsigned tmp; + CPpmd_State *s; + + if (!ctx->NumStats) + { + s = ONE_STATE(ctx); + if ((Byte *)Ppmd8_GetPtr(p, SUCCESSOR(s)) >= p->UnitsStart) + { + if (order < p->MaxOrder) + SetSuccessor(s, CutOff(p, CTX(SUCCESSOR(s)), order + 1)); + else + SetSuccessor(s, 0); + if (SUCCESSOR(s) || order <= 9) /* O_BOUND */ + return REF(ctx); + } + SpecialFreeUnit(p, ctx); + return 0; + } + + ctx->Stats = STATS_REF(MoveUnitsUp(p, STATS(ctx), tmp = ((unsigned)ctx->NumStats + 2) >> 1)); + + for (s = STATS(ctx) + (i = ctx->NumStats); s >= STATS(ctx); s--) + if ((Byte *)Ppmd8_GetPtr(p, SUCCESSOR(s)) < p->UnitsStart) + { + CPpmd_State *s2 = STATS(ctx) + (i--); + SetSuccessor(s, 0); + SwapStates(s, s2); + } + else if (order < p->MaxOrder) + SetSuccessor(s, CutOff(p, CTX(SUCCESSOR(s)), order + 1)); + else + SetSuccessor(s, 0); + + if (i != ctx->NumStats && order) + { + ctx->NumStats = (Byte)i; + s = STATS(ctx); + if (i < 0) + { + FreeUnits(p, s, tmp); + SpecialFreeUnit(p, ctx); + return 0; + } + if (i == 0) + { + ctx->Flags = (Byte)((ctx->Flags & 0x10) + 0x08 * (s->Symbol >= 0x40)); + *ONE_STATE(ctx) = *s; + FreeUnits(p, s, tmp); + /* 9.31: the code was fixed. It's was not BUG, if Freq <= MAX_FREQ = 124 */ + ONE_STATE(ctx)->Freq = (Byte)(((unsigned)ONE_STATE(ctx)->Freq + 11) >> 3); + } + else + Refresh(p, ctx, tmp, ctx->SummFreq > 16 * i); + } + return REF(ctx); +} + +#ifdef PPMD8_FREEZE_SUPPORT +static CPpmd_Void_Ref RemoveBinContexts(CPpmd8 *p, CTX_PTR ctx, unsigned order) +{ + CPpmd_State *s; + if (!ctx->NumStats) + { + s = ONE_STATE(ctx); + if ((Byte *)Ppmd8_GetPtr(p, SUCCESSOR(s)) >= p->UnitsStart && order < p->MaxOrder) + SetSuccessor(s, RemoveBinContexts(p, CTX(SUCCESSOR(s)), order + 1)); + else + SetSuccessor(s, 0); + /* Suffix context can be removed already, since different (high-order) + Successors may refer to same context. So we check Flags == 0xFF (Stamp == EMPTY_NODE) */ + if (!SUCCESSOR(s) && (!SUFFIX(ctx)->NumStats || SUFFIX(ctx)->Flags == 0xFF)) + { + FreeUnits(p, ctx, 1); + return 0; + } + else + return REF(ctx); + } + + for (s = STATS(ctx) + ctx->NumStats; s >= STATS(ctx); s--) + if ((Byte *)Ppmd8_GetPtr(p, SUCCESSOR(s)) >= p->UnitsStart && order < p->MaxOrder) + SetSuccessor(s, RemoveBinContexts(p, CTX(SUCCESSOR(s)), order + 1)); + else + SetSuccessor(s, 0); + + return REF(ctx); +} +#endif + +static UInt32 GetUsedMemory(const CPpmd8 *p) +{ + UInt32 v = 0; + unsigned i; + for (i = 0; i < PPMD_NUM_INDEXES; i++) + v += p->Stamps[i] * I2U(i); + return p->Size - (UInt32)(p->HiUnit - p->LoUnit) - (UInt32)(p->UnitsStart - p->Text) - U2B(v); +} + +#ifdef PPMD8_FREEZE_SUPPORT + #define RESTORE_MODEL(c1, fSuccessor) RestoreModel(p, c1, fSuccessor) +#else + #define RESTORE_MODEL(c1, fSuccessor) RestoreModel(p, c1) +#endif + +static void RestoreModel(CPpmd8 *p, CTX_PTR c1 + #ifdef PPMD8_FREEZE_SUPPORT + , CTX_PTR fSuccessor + #endif + ) +{ + CTX_PTR c; + CPpmd_State *s; + RESET_TEXT(0); + for (c = p->MaxContext; c != c1; c = SUFFIX(c)) + if (--(c->NumStats) == 0) + { + s = STATS(c); + c->Flags = (Byte)((c->Flags & 0x10) + 0x08 * (s->Symbol >= 0x40)); + *ONE_STATE(c) = *s; + SpecialFreeUnit(p, s); + ONE_STATE(c)->Freq = (Byte)(((unsigned)ONE_STATE(c)->Freq + 11) >> 3); + } + else + Refresh(p, c, (c->NumStats+3) >> 1, 0); + + for (; c != p->MinContext; c = SUFFIX(c)) + if (!c->NumStats) + ONE_STATE(c)->Freq = (Byte)(ONE_STATE(c)->Freq - (ONE_STATE(c)->Freq >> 1)); + else if ((c->SummFreq += 4) > 128 + 4 * c->NumStats) + Refresh(p, c, (c->NumStats + 2) >> 1, 1); + + #ifdef PPMD8_FREEZE_SUPPORT + if (p->RestoreMethod > PPMD8_RESTORE_METHOD_FREEZE) + { + p->MaxContext = fSuccessor; + p->GlueCount += !(p->Stamps[1] & 1); + } + else if (p->RestoreMethod == PPMD8_RESTORE_METHOD_FREEZE) + { + while (p->MaxContext->Suffix) + p->MaxContext = SUFFIX(p->MaxContext); + RemoveBinContexts(p, p->MaxContext, 0); + p->RestoreMethod++; + p->GlueCount = 0; + p->OrderFall = p->MaxOrder; + } + else + #endif + if (p->RestoreMethod == PPMD8_RESTORE_METHOD_RESTART || GetUsedMemory(p) < (p->Size >> 1)) + RestartModel(p); + else + { + while (p->MaxContext->Suffix) + p->MaxContext = SUFFIX(p->MaxContext); + do + { + CutOff(p, p->MaxContext, 0); + ExpandTextArea(p); + } + while (GetUsedMemory(p) > 3 * (p->Size >> 2)); + p->GlueCount = 0; + p->OrderFall = p->MaxOrder; + } +} + +static CTX_PTR CreateSuccessors(CPpmd8 *p, Bool skip, CPpmd_State *s1, CTX_PTR c) +{ + CPpmd_State upState; + Byte flags; + CPpmd_Byte_Ref upBranch = (CPpmd_Byte_Ref)SUCCESSOR(p->FoundState); + /* fixed over Shkarin's code. Maybe it could work without + 1 too. */ + CPpmd_State *ps[PPMD8_MAX_ORDER + 1]; + unsigned numPs = 0; + + if (!skip) + ps[numPs++] = p->FoundState; + + while (c->Suffix) + { + CPpmd_Void_Ref successor; + CPpmd_State *s; + c = SUFFIX(c); + if (s1) + { + s = s1; + s1 = NULL; + } + else if (c->NumStats != 0) + { + for (s = STATS(c); s->Symbol != p->FoundState->Symbol; s++); + if (s->Freq < MAX_FREQ - 9) + { + s->Freq++; + c->SummFreq++; + } + } + else + { + s = ONE_STATE(c); + s->Freq = (Byte)(s->Freq + (!SUFFIX(c)->NumStats & (s->Freq < 24))); + } + successor = SUCCESSOR(s); + if (successor != upBranch) + { + c = CTX(successor); + if (numPs == 0) + return c; + break; + } + ps[numPs++] = s; + } + + upState.Symbol = *(const Byte *)Ppmd8_GetPtr(p, upBranch); + SetSuccessor(&upState, upBranch + 1); + flags = (Byte)(0x10 * (p->FoundState->Symbol >= 0x40) + 0x08 * (upState.Symbol >= 0x40)); + + if (c->NumStats == 0) + upState.Freq = ONE_STATE(c)->Freq; + else + { + UInt32 cf, s0; + CPpmd_State *s; + for (s = STATS(c); s->Symbol != upState.Symbol; s++); + cf = s->Freq - 1; + s0 = c->SummFreq - c->NumStats - cf; + upState.Freq = (Byte)(1 + ((2 * cf <= s0) ? (5 * cf > s0) : ((cf + 2 * s0 - 3) / s0))); + } + + do + { + /* Create Child */ + CTX_PTR c1; /* = AllocContext(p); */ + if (p->HiUnit != p->LoUnit) + c1 = (CTX_PTR)(p->HiUnit -= UNIT_SIZE); + else if (p->FreeList[0] != 0) + c1 = (CTX_PTR)RemoveNode(p, 0); + else + { + c1 = (CTX_PTR)AllocUnitsRare(p, 0); + if (!c1) + return NULL; + } + c1->NumStats = 0; + c1->Flags = flags; + *ONE_STATE(c1) = upState; + c1->Suffix = REF(c); + SetSuccessor(ps[--numPs], REF(c1)); + c = c1; + } + while (numPs != 0); + + return c; +} + +static CTX_PTR ReduceOrder(CPpmd8 *p, CPpmd_State *s1, CTX_PTR c) +{ + CPpmd_State *s = NULL; + CTX_PTR c1 = c; + CPpmd_Void_Ref upBranch = REF(p->Text); + + #ifdef PPMD8_FREEZE_SUPPORT + /* The BUG in Shkarin's code was fixed: ps could overflow in CUT_OFF mode. */ + CPpmd_State *ps[PPMD8_MAX_ORDER + 1]; + unsigned numPs = 0; + ps[numPs++] = p->FoundState; + #endif + + SetSuccessor(p->FoundState, upBranch); + p->OrderFall++; + + for (;;) + { + if (s1) + { + c = SUFFIX(c); + s = s1; + s1 = NULL; + } + else + { + if (!c->Suffix) + { + #ifdef PPMD8_FREEZE_SUPPORT + if (p->RestoreMethod > PPMD8_RESTORE_METHOD_FREEZE) + { + do { SetSuccessor(ps[--numPs], REF(c)); } while (numPs); + RESET_TEXT(1); + p->OrderFall = 1; + } + #endif + return c; + } + c = SUFFIX(c); + if (c->NumStats) + { + if ((s = STATS(c))->Symbol != p->FoundState->Symbol) + do { s++; } while (s->Symbol != p->FoundState->Symbol); + if (s->Freq < MAX_FREQ - 9) + { + s->Freq += 2; + c->SummFreq += 2; + } + } + else + { + s = ONE_STATE(c); + s->Freq = (Byte)(s->Freq + (s->Freq < 32)); + } + } + if (SUCCESSOR(s)) + break; + #ifdef PPMD8_FREEZE_SUPPORT + ps[numPs++] = s; + #endif + SetSuccessor(s, upBranch); + p->OrderFall++; + } + + #ifdef PPMD8_FREEZE_SUPPORT + if (p->RestoreMethod > PPMD8_RESTORE_METHOD_FREEZE) + { + c = CTX(SUCCESSOR(s)); + do { SetSuccessor(ps[--numPs], REF(c)); } while (numPs); + RESET_TEXT(1); + p->OrderFall = 1; + return c; + } + else + #endif + if (SUCCESSOR(s) <= upBranch) + { + CTX_PTR successor; + CPpmd_State *s2 = p->FoundState; + p->FoundState = s; + + successor = CreateSuccessors(p, False, NULL, c); + if (successor == NULL) + SetSuccessor(s, 0); + else + SetSuccessor(s, REF(successor)); + p->FoundState = s2; + } + + if (p->OrderFall == 1 && c1 == p->MaxContext) + { + SetSuccessor(p->FoundState, SUCCESSOR(s)); + p->Text--; + } + if (SUCCESSOR(s) == 0) + return NULL; + return CTX(SUCCESSOR(s)); +} + +static void UpdateModel(CPpmd8 *p) +{ + CPpmd_Void_Ref successor, fSuccessor = SUCCESSOR(p->FoundState); + CTX_PTR c; + unsigned s0, ns, fFreq = p->FoundState->Freq; + Byte flag, fSymbol = p->FoundState->Symbol; + CPpmd_State *s = NULL; + + if (p->FoundState->Freq < MAX_FREQ / 4 && p->MinContext->Suffix != 0) + { + c = SUFFIX(p->MinContext); + + if (c->NumStats == 0) + { + s = ONE_STATE(c); + if (s->Freq < 32) + s->Freq++; + } + else + { + s = STATS(c); + if (s->Symbol != p->FoundState->Symbol) + { + do { s++; } while (s->Symbol != p->FoundState->Symbol); + if (s[0].Freq >= s[-1].Freq) + { + SwapStates(&s[0], &s[-1]); + s--; + } + } + if (s->Freq < MAX_FREQ - 9) + { + s->Freq += 2; + c->SummFreq += 2; + } + } + } + + c = p->MaxContext; + if (p->OrderFall == 0 && fSuccessor) + { + CTX_PTR cs = CreateSuccessors(p, True, s, p->MinContext); + if (cs == 0) + { + SetSuccessor(p->FoundState, 0); + RESTORE_MODEL(c, CTX(fSuccessor)); + } + else + { + SetSuccessor(p->FoundState, REF(cs)); + p->MaxContext = cs; + } + return; + } + + *p->Text++ = p->FoundState->Symbol; + successor = REF(p->Text); + if (p->Text >= p->UnitsStart) + { + RESTORE_MODEL(c, CTX(fSuccessor)); /* check it */ + return; + } + + if (!fSuccessor) + { + CTX_PTR cs = ReduceOrder(p, s, p->MinContext); + if (cs == NULL) + { + RESTORE_MODEL(c, 0); + return; + } + fSuccessor = REF(cs); + } + else if ((Byte *)Ppmd8_GetPtr(p, fSuccessor) < p->UnitsStart) + { + CTX_PTR cs = CreateSuccessors(p, False, s, p->MinContext); + if (cs == NULL) + { + RESTORE_MODEL(c, 0); + return; + } + fSuccessor = REF(cs); + } + + if (--p->OrderFall == 0) + { + successor = fSuccessor; + p->Text -= (p->MaxContext != p->MinContext); + } + #ifdef PPMD8_FREEZE_SUPPORT + else if (p->RestoreMethod > PPMD8_RESTORE_METHOD_FREEZE) + { + successor = fSuccessor; + RESET_TEXT(0); + p->OrderFall = 0; + } + #endif + + s0 = p->MinContext->SummFreq - (ns = p->MinContext->NumStats) - fFreq; + flag = (Byte)(0x08 * (fSymbol >= 0x40)); + + for (; c != p->MinContext; c = SUFFIX(c)) + { + unsigned ns1; + UInt32 cf, sf; + if ((ns1 = c->NumStats) != 0) + { + if ((ns1 & 1) != 0) + { + /* Expand for one UNIT */ + unsigned oldNU = (ns1 + 1) >> 1; + unsigned i = U2I(oldNU); + if (i != U2I(oldNU + 1)) + { + void *ptr = AllocUnits(p, i + 1); + void *oldPtr; + if (!ptr) + { + RESTORE_MODEL(c, CTX(fSuccessor)); + return; + } + oldPtr = STATS(c); + MyMem12Cpy(ptr, oldPtr, oldNU); + InsertNode(p, oldPtr, i); + c->Stats = STATS_REF(ptr); + } + } + c->SummFreq = (UInt16)(c->SummFreq + (3 * ns1 + 1 < ns)); + } + else + { + CPpmd_State *s2 = (CPpmd_State*)AllocUnits(p, 0); + if (!s2) + { + RESTORE_MODEL(c, CTX(fSuccessor)); + return; + } + *s2 = *ONE_STATE(c); + c->Stats = REF(s2); + if (s2->Freq < MAX_FREQ / 4 - 1) + s2->Freq <<= 1; + else + s2->Freq = MAX_FREQ - 4; + c->SummFreq = (UInt16)(s2->Freq + p->InitEsc + (ns > 2)); + } + cf = 2 * fFreq * (c->SummFreq + 6); + sf = (UInt32)s0 + c->SummFreq; + if (cf < 6 * sf) + { + cf = 1 + (cf > sf) + (cf >= 4 * sf); + c->SummFreq += 4; + } + else + { + cf = 4 + (cf > 9 * sf) + (cf > 12 * sf) + (cf > 15 * sf); + c->SummFreq = (UInt16)(c->SummFreq + cf); + } + { + CPpmd_State *s2 = STATS(c) + ns1 + 1; + SetSuccessor(s2, successor); + s2->Symbol = fSymbol; + s2->Freq = (Byte)cf; + c->Flags |= flag; + c->NumStats = (Byte)(ns1 + 1); + } + } + p->MaxContext = p->MinContext = CTX(fSuccessor); +} + +static void Rescale(CPpmd8 *p) +{ + unsigned i, adder, sumFreq, escFreq; + CPpmd_State *stats = STATS(p->MinContext); + CPpmd_State *s = p->FoundState; + { + CPpmd_State tmp = *s; + for (; s != stats; s--) + s[0] = s[-1]; + *s = tmp; + } + escFreq = p->MinContext->SummFreq - s->Freq; + s->Freq += 4; + adder = (p->OrderFall != 0 + #ifdef PPMD8_FREEZE_SUPPORT + || p->RestoreMethod > PPMD8_RESTORE_METHOD_FREEZE + #endif + ); + s->Freq = (Byte)((s->Freq + adder) >> 1); + sumFreq = s->Freq; + + i = p->MinContext->NumStats; + do + { + escFreq -= (++s)->Freq; + s->Freq = (Byte)((s->Freq + adder) >> 1); + sumFreq += s->Freq; + if (s[0].Freq > s[-1].Freq) + { + CPpmd_State *s1 = s; + CPpmd_State tmp = *s1; + do + s1[0] = s1[-1]; + while (--s1 != stats && tmp.Freq > s1[-1].Freq); + *s1 = tmp; + } + } + while (--i); + + if (s->Freq == 0) + { + unsigned numStats = p->MinContext->NumStats; + unsigned n0, n1; + do { i++; } while ((--s)->Freq == 0); + escFreq += i; + p->MinContext->NumStats = (Byte)(p->MinContext->NumStats - i); + if (p->MinContext->NumStats == 0) + { + CPpmd_State tmp = *stats; + tmp.Freq = (Byte)((2 * tmp.Freq + escFreq - 1) / escFreq); + if (tmp.Freq > MAX_FREQ / 3) + tmp.Freq = MAX_FREQ / 3; + InsertNode(p, stats, U2I((numStats + 2) >> 1)); + p->MinContext->Flags = (Byte)((p->MinContext->Flags & 0x10) + 0x08 * (tmp.Symbol >= 0x40)); + *(p->FoundState = ONE_STATE(p->MinContext)) = tmp; + return; + } + n0 = (numStats + 2) >> 1; + n1 = (p->MinContext->NumStats + 2) >> 1; + if (n0 != n1) + p->MinContext->Stats = STATS_REF(ShrinkUnits(p, stats, n0, n1)); + p->MinContext->Flags &= ~0x08; + p->MinContext->Flags |= 0x08 * ((s = STATS(p->MinContext))->Symbol >= 0x40); + i = p->MinContext->NumStats; + do { p->MinContext->Flags |= 0x08*((++s)->Symbol >= 0x40); } while (--i); + } + p->MinContext->SummFreq = (UInt16)(sumFreq + escFreq - (escFreq >> 1)); + p->MinContext->Flags |= 0x4; + p->FoundState = STATS(p->MinContext); +} + +CPpmd_See *Ppmd8_MakeEscFreq(CPpmd8 *p, unsigned numMasked1, UInt32 *escFreq) +{ + CPpmd_See *see; + if (p->MinContext->NumStats != 0xFF) + { + see = p->See[(unsigned)p->NS2Indx[(unsigned)p->MinContext->NumStats + 2] - 3] + + (p->MinContext->SummFreq > 11 * ((unsigned)p->MinContext->NumStats + 1)) + + 2 * (unsigned)(2 * (unsigned)p->MinContext->NumStats < + ((unsigned)SUFFIX(p->MinContext)->NumStats + numMasked1)) + + p->MinContext->Flags; + { + unsigned r = (see->Summ >> see->Shift); + see->Summ = (UInt16)(see->Summ - r); + *escFreq = r + (r == 0); + } + } + else + { + see = &p->DummySee; + *escFreq = 1; + } + return see; +} + +static void NextContext(CPpmd8 *p) +{ + CTX_PTR c = CTX(SUCCESSOR(p->FoundState)); + if (p->OrderFall == 0 && (Byte *)c >= p->UnitsStart) + p->MinContext = p->MaxContext = c; + else + { + UpdateModel(p); + p->MinContext = p->MaxContext; + } +} + +void Ppmd8_Update1(CPpmd8 *p) +{ + CPpmd_State *s = p->FoundState; + s->Freq += 4; + p->MinContext->SummFreq += 4; + if (s[0].Freq > s[-1].Freq) + { + SwapStates(&s[0], &s[-1]); + p->FoundState = --s; + if (s->Freq > MAX_FREQ) + Rescale(p); + } + NextContext(p); +} + +void Ppmd8_Update1_0(CPpmd8 *p) +{ + p->PrevSuccess = (2 * p->FoundState->Freq >= p->MinContext->SummFreq); + p->RunLength += p->PrevSuccess; + p->MinContext->SummFreq += 4; + if ((p->FoundState->Freq += 4) > MAX_FREQ) + Rescale(p); + NextContext(p); +} + +void Ppmd8_UpdateBin(CPpmd8 *p) +{ + p->FoundState->Freq = (Byte)(p->FoundState->Freq + (p->FoundState->Freq < 196)); + p->PrevSuccess = 1; + p->RunLength++; + NextContext(p); +} + +void Ppmd8_Update2(CPpmd8 *p) +{ + p->MinContext->SummFreq += 4; + if ((p->FoundState->Freq += 4) > MAX_FREQ) + Rescale(p); + p->RunLength = p->InitRL; + UpdateModel(p); + p->MinContext = p->MaxContext; +} + +/* Ppmd8Dec.c -- PPMdI Decoder +2010-04-16 : Igor Pavlov : Public domain +This code is based on: + PPMd var.I (2002): Dmitry Shkarin : Public domain + Carryless rangecoder (1999): Dmitry Subbotin : Public domain */ + +Bool Ppmd8_RangeDec_Init(CPpmd8 *p) +{ + unsigned i; + p->Low = 0; + p->Range = 0xFFFFFFFF; + p->Code = 0; + for (i = 0; i < 4; i++) + p->Code = (p->Code << 8) | p->Stream.In->Read(p->Stream.In); + return (p->Code < 0xFFFFFFFF); +} + +static UInt32 RangeDec_GetThreshold(CPpmd8 *p, UInt32 total) +{ + return p->Code / (p->Range /= total); +} + +static void RangeDec_Decode(CPpmd8 *p, UInt32 start, UInt32 size) +{ + start *= p->Range; + p->Low += start; + p->Code -= start; + p->Range *= size; + + while ((p->Low ^ (p->Low + p->Range)) < kTop || + (p->Range < kBot && ((p->Range = (0 - p->Low) & (kBot - 1)), 1))) + { + p->Code = (p->Code << 8) | p->Stream.In->Read(p->Stream.In); + p->Range <<= 8; + p->Low <<= 8; + } +} + +#define MASK(sym) ((signed char *)charMask)[sym] + +int Ppmd8_DecodeSymbol(CPpmd8 *p) +{ + size_t charMask[256 / sizeof(size_t)]; + if (p->MinContext->NumStats != 0) + { + CPpmd_State *s = Ppmd8_GetStats(p, p->MinContext); + unsigned i; + UInt32 count, hiCnt; + if ((count = RangeDec_GetThreshold(p, p->MinContext->SummFreq)) < (hiCnt = s->Freq)) + { + Byte symbol; + RangeDec_Decode(p, 0, s->Freq); + p->FoundState = s; + symbol = s->Symbol; + Ppmd8_Update1_0(p); + return symbol; + } + p->PrevSuccess = 0; + i = p->MinContext->NumStats; + do + { + if ((hiCnt += (++s)->Freq) > count) + { + Byte symbol; + RangeDec_Decode(p, hiCnt - s->Freq, s->Freq); + p->FoundState = s; + symbol = s->Symbol; + Ppmd8_Update1(p); + return symbol; + } + } + while (--i); + if (count >= p->MinContext->SummFreq) + return -2; + RangeDec_Decode(p, hiCnt, p->MinContext->SummFreq - hiCnt); + PPMD_SetAllBitsIn256Bytes(charMask); + MASK(s->Symbol) = 0; + i = p->MinContext->NumStats; + do { MASK((--s)->Symbol) = 0; } while (--i); + } + else + { + UInt16 *prob = Ppmd8_GetBinSumm(p); + if (((p->Code / (p->Range >>= 14)) < *prob)) + { + Byte symbol; + RangeDec_Decode(p, 0, *prob); + *prob = (UInt16)PPMD_UPDATE_PROB_0(*prob); + symbol = (p->FoundState = Ppmd8Context_OneState(p->MinContext))->Symbol; + Ppmd8_UpdateBin(p); + return symbol; + } + RangeDec_Decode(p, *prob, (1 << 14) - *prob); + *prob = (UInt16)PPMD_UPDATE_PROB_1(*prob); + p->InitEsc = PPMD8_kExpEscape[*prob >> 10]; + PPMD_SetAllBitsIn256Bytes(charMask); + MASK(Ppmd8Context_OneState(p->MinContext)->Symbol) = 0; + p->PrevSuccess = 0; + } + for (;;) + { + CPpmd_State *ps[256], *s; + UInt32 freqSum, count, hiCnt; + CPpmd_See *see; + unsigned i, num, numMasked = p->MinContext->NumStats; + do + { + p->OrderFall++; + if (!p->MinContext->Suffix) + return -1; + p->MinContext = Ppmd8_GetContext(p, p->MinContext->Suffix); + } + while (p->MinContext->NumStats == numMasked); + hiCnt = 0; + s = Ppmd8_GetStats(p, p->MinContext); + i = 0; + num = p->MinContext->NumStats - numMasked; + do + { + int k = (int)(MASK(s->Symbol)); + hiCnt += (s->Freq & k); + ps[i] = s++; + i -= k; + } + while (i != num); + + see = Ppmd8_MakeEscFreq(p, numMasked, &freqSum); + freqSum += hiCnt; + count = RangeDec_GetThreshold(p, freqSum); + + if (count < hiCnt) + { + Byte symbol; + CPpmd_State **pps = ps; + for (hiCnt = 0; (hiCnt += (*pps)->Freq) <= count; pps++); + s = *pps; + RangeDec_Decode(p, hiCnt - s->Freq, s->Freq); + Ppmd_See_Update(see); + p->FoundState = s; + symbol = s->Symbol; + Ppmd8_Update2(p); + return symbol; + } + if (count >= freqSum) + return -2; + RangeDec_Decode(p, hiCnt, freqSum - hiCnt); + see->Summ = (UInt16)(see->Summ + freqSum); + do { MASK(ps[--i]->Symbol) = 0; } while (i != 0); + } +} + +/* H->I changes: + NS2Indx + GlewCount, and Glue method + BinSum + See / EscFreq + CreateSuccessors updates more suffix contexts + UpdateModel consts. + PrevSuccess Update +*/ + +const IPpmd8 __archive_ppmd8_functions = +{ + &Ppmd8_Construct, + &Ppmd8_Alloc, + &Ppmd8_Free, + &Ppmd8_Init, + &Ppmd8_RangeDec_Init, + &Ppmd8_DecodeSymbol, +}; diff --git a/libarchive/archive_ppmd8_private.h b/libarchive/archive_ppmd8_private.h new file mode 100644 index 00000000000..534927860eb --- /dev/null +++ b/libarchive/archive_ppmd8_private.h @@ -0,0 +1,148 @@ +/* Ppmd8.h -- PPMdI codec +2011-01-27 : Igor Pavlov : Public domain +This code is based on: + PPMd var.I (2002): Dmitry Shkarin : Public domain + Carryless rangecoder (1999): Dmitry Subbotin : Public domain */ + +#ifndef __PPMD8_H +#define __PPMD8_H + +#include "archive_ppmd_private.h" + +#define PPMD8_MIN_ORDER 2 +#define PPMD8_MAX_ORDER 16 + +struct CPpmd8_Context_; + +typedef + #ifdef PPMD_32BIT + struct CPpmd8_Context_ * + #else + UInt32 + #endif + CPpmd8_Context_Ref; + +#pragma pack(push, 1) + +typedef struct CPpmd8_Context_ +{ + Byte NumStats; + Byte Flags; + UInt16 SummFreq; + CPpmd_State_Ref Stats; + CPpmd8_Context_Ref Suffix; +} CPpmd8_Context; + +#pragma pack(pop) + +#define Ppmd8Context_OneState(p) ((CPpmd_State *)&(p)->SummFreq) + +/* The BUG in Shkarin's code for FREEZE mode was fixed, but that fixed + code is not compatible with original code for some files compressed + in FREEZE mode. So we disable FREEZE mode support. */ + +enum +{ + PPMD8_RESTORE_METHOD_RESTART, + PPMD8_RESTORE_METHOD_CUT_OFF + #ifdef PPMD8_FREEZE_SUPPORT + , PPMD8_RESTORE_METHOD_FREEZE + #endif +}; + +typedef struct +{ + CPpmd8_Context *MinContext, *MaxContext; + CPpmd_State *FoundState; + unsigned OrderFall, InitEsc, PrevSuccess, MaxOrder; + Int32 RunLength, InitRL; /* must be 32-bit at least */ + + UInt32 Size; + UInt32 GlueCount; + Byte *Base, *LoUnit, *HiUnit, *Text, *UnitsStart; + UInt32 AlignOffset; + unsigned RestoreMethod; + + /* Range Coder */ + UInt32 Range; + UInt32 Code; + UInt32 Low; + union + { + IByteIn *In; + IByteOut *Out; + } Stream; + + Byte Indx2Units[PPMD_NUM_INDEXES]; + Byte Units2Indx[128]; + CPpmd_Void_Ref FreeList[PPMD_NUM_INDEXES]; + UInt32 Stamps[PPMD_NUM_INDEXES]; + + Byte NS2BSIndx[256], NS2Indx[260]; + CPpmd_See DummySee, See[24][32]; + UInt16 BinSumm[25][64]; +} CPpmd8; + +void Ppmd8_Construct(CPpmd8 *p); +Bool Ppmd8_Alloc(CPpmd8 *p, UInt32 size); +void Ppmd8_Free(CPpmd8 *p); +void Ppmd8_Init(CPpmd8 *p, unsigned maxOrder, unsigned restoreMethod); +#define Ppmd8_WasAllocated(p) ((p)->Base != NULL) + + +/* ---------- Internal Functions ---------- */ + +extern const Byte PPMD8_kExpEscape[16]; + +#ifdef PPMD_32BIT + #define Ppmd8_GetPtr(p, ptr) (ptr) + #define Ppmd8_GetContext(p, ptr) (ptr) + #define Ppmd8_GetStats(p, ctx) ((ctx)->Stats) +#else + #define Ppmd8_GetPtr(p, offs) ((void *)((p)->Base + (offs))) + #define Ppmd8_GetContext(p, offs) ((CPpmd8_Context *)Ppmd8_GetPtr((p), (offs))) + #define Ppmd8_GetStats(p, ctx) ((CPpmd_State *)Ppmd8_GetPtr((p), ((ctx)->Stats))) +#endif + +void Ppmd8_Update1(CPpmd8 *p); +void Ppmd8_Update1_0(CPpmd8 *p); +void Ppmd8_Update2(CPpmd8 *p); +void Ppmd8_UpdateBin(CPpmd8 *p); + +#define Ppmd8_GetBinSumm(p) \ + &p->BinSumm[p->NS2Indx[Ppmd8Context_OneState(p->MinContext)->Freq - 1]][ \ + p->NS2BSIndx[Ppmd8_GetContext(p, p->MinContext->Suffix)->NumStats] + \ + p->PrevSuccess + p->MinContext->Flags + ((p->RunLength >> 26) & 0x20)] + +CPpmd_See *Ppmd8_MakeEscFreq(CPpmd8 *p, unsigned numMasked, UInt32 *scale); + + +/* ---------- Decode ---------- */ + +Bool Ppmd8_RangeDec_Init(CPpmd8 *p); +#define Ppmd8_RangeDec_IsFinishedOK(p) ((p)->Code == 0) +int Ppmd8_DecodeSymbol(CPpmd8 *p); /* returns: -1 as EndMarker, -2 as DataError */ + +/* ---------- Encode ---------- */ + +#define Ppmd8_RangeEnc_Init(p) { (p)->Low = 0; (p)->Range = 0xFFFFFFFF; } +void Ppmd8_RangeEnc_FlushData(CPpmd8 *p); +void Ppmd8_EncodeSymbol(CPpmd8 *p, int symbol); /* symbol = -1 means EndMarker */ + +typedef struct +{ + /* Base Functions */ + void (*Ppmd8_Construct)(CPpmd8 *p); + Bool (*Ppmd8_Alloc)(CPpmd8 *p, UInt32 size); + void (*Ppmd8_Free)(CPpmd8 *p); + void (*Ppmd8_Init)(CPpmd8 *p, unsigned max_order, unsigned restore_method); + #define Ppmd7_WasAllocated(p) ((p)->Base != NULL) + + /* Decode Functions */ + int (*Ppmd8_RangeDec_Init)(CPpmd8 *p); + int (*Ppmd8_DecodeSymbol)(CPpmd8 *p); +} IPpmd8; + +extern const IPpmd8 __archive_ppmd8_functions; + +#endif diff --git a/libarchive/archive_read_disk_posix.c b/libarchive/archive_read_disk_posix.c index cdf7541238c..09c366f5feb 100644 --- a/libarchive/archive_read_disk_posix.c +++ b/libarchive/archive_read_disk_posix.c @@ -856,7 +856,11 @@ next_entry(struct archive_read_disk *a, struct tree *t, const struct stat *st; /* info to use for this entry */ const struct stat *lst;/* lstat() information */ const char *name; - int descend, r; + int delayed, delayed_errno, descend, r; + struct archive_string delayed_str; + + delayed = ARCHIVE_OK; + archive_string_init(&delayed_str); st = NULL; lst = NULL; @@ -885,11 +889,23 @@ next_entry(struct archive_read_disk *a, struct tree *t, case TREE_REGULAR: lst = tree_current_lstat(t); if (lst == NULL) { + if (errno == ENOENT && t->depth > 0) { + delayed = ARCHIVE_WARN; + delayed_errno = errno; + if (delayed_str.length == 0) { + archive_string_sprintf(&delayed_str, + "%s", tree_current_path(t)); + } else { + archive_string_sprintf(&delayed_str, + " %s", tree_current_path(t)); + } + } else { archive_set_error(&a->archive, errno, "%s: Cannot stat", tree_current_path(t)); tree_enter_initial_dir(t); return (ARCHIVE_FAILED); + } } break; } @@ -1083,6 +1099,18 @@ next_entry(struct archive_read_disk *a, struct tree *t, r = archive_read_disk_entry_from_file(&(a->archive), entry, t->entry_fd, st); + if (r == ARCHIVE_OK) { + r = delayed; + if (r != ARCHIVE_OK) { + archive_string_sprintf(&delayed_str, ": %s", + "File removed before we read it"); + archive_set_error(&(a->archive), delayed_errno, + "%s", delayed_str.s); + } + } + if (!archive_string_empty(&delayed_str)) + archive_string_free(&delayed_str); + return (r); } diff --git a/libarchive/archive_read_open_file.c b/libarchive/archive_read_open_file.c index bfe933bf32e..101dae6cd9e 100644 --- a/libarchive/archive_read_open_file.c +++ b/libarchive/archive_read_open_file.c @@ -174,8 +174,7 @@ file_close(struct archive *a, void *client_data) struct read_FILE_data *mine = (struct read_FILE_data *)client_data; (void)a; /* UNUSED */ - if (mine->buffer != NULL) - free(mine->buffer); + free(mine->buffer); free(mine); return (ARCHIVE_OK); } diff --git a/libarchive/archive_read_support_format_7zip.c b/libarchive/archive_read_support_format_7zip.c index bccbf896603..8ca422ec066 100644 --- a/libarchive/archive_read_support_format_7zip.c +++ b/libarchive/archive_read_support_format_7zip.c @@ -2964,13 +2964,7 @@ get_uncompressed_data(struct archive_read *a, const void **buff, size_t size, if (zip->codec == _7Z_COPY && zip->codec2 == (unsigned long)-1) { /* Copy mode. */ - /* - * Note: '1' here is a performance optimization. - * Recall that the decompression layer returns a count of - * available bytes; asking for more than that forces the - * decompressor to combine reads by copying data. - */ - *buff = __archive_read_ahead(a, 1, &bytes_avail); + *buff = __archive_read_ahead(a, minimum, &bytes_avail); if (bytes_avail <= 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, @@ -3323,8 +3317,7 @@ setup_decode_folder(struct archive_read *a, struct _7z_folder *folder, * Release the memory which the previous folder used for BCJ2. */ for (i = 0; i < 3; i++) { - if (zip->sub_stream_buff[i] != NULL) - free(zip->sub_stream_buff[i]); + free(zip->sub_stream_buff[i]); zip->sub_stream_buff[i] = NULL; } diff --git a/libarchive/archive_read_support_format_ar.c b/libarchive/archive_read_support_format_ar.c index 1b0205cc705..296b7db0411 100644 --- a/libarchive/archive_read_support_format_ar.c +++ b/libarchive/archive_read_support_format_ar.c @@ -138,8 +138,7 @@ archive_read_format_ar_cleanup(struct archive_read *a) struct ar *ar; ar = (struct ar *)(a->format->data); - if (ar->strtab) - free(ar->strtab); + free(ar->strtab); free(ar); (a->format->data) = NULL; return (ARCHIVE_OK); @@ -388,9 +387,10 @@ _ar_read_header(struct archive_read *a, struct archive_entry *entry, /* * "/" is the SVR4/GNU archive symbol table. + * "/SYM64/" is the SVR4/GNU 64-bit variant archive symbol table. */ - if (strcmp(filename, "/") == 0) { - archive_entry_copy_pathname(entry, "/"); + if (strcmp(filename, "/") == 0 || strcmp(filename, "/SYM64/") == 0) { + archive_entry_copy_pathname(entry, filename); /* Parse the time, owner, mode, size fields. */ r = ar_parse_common_header(ar, entry, h); /* Force the file type to a regular file. */ diff --git a/libarchive/archive_read_support_format_cpio.c b/libarchive/archive_read_support_format_cpio.c index 67d5b21eebb..1c96e6ac195 100644 --- a/libarchive/archive_read_support_format_cpio.c +++ b/libarchive/archive_read_support_format_cpio.c @@ -955,8 +955,7 @@ archive_read_format_cpio_cleanup(struct archive_read *a) while (cpio->links_head != NULL) { struct links_entry *lp = cpio->links_head->next; - if (cpio->links_head->name) - free(cpio->links_head->name); + free(cpio->links_head->name); free(cpio->links_head); cpio->links_head = lp; } diff --git a/libarchive/archive_read_support_format_iso9660.c b/libarchive/archive_read_support_format_iso9660.c index 28acfefbba8..db14d41dff4 100644 --- a/libarchive/archive_read_support_format_iso9660.c +++ b/libarchive/archive_read_support_format_iso9660.c @@ -1724,8 +1724,7 @@ archive_read_format_iso9660_cleanup(struct archive_read *a) free(iso9660->read_ce_req.reqs); archive_string_free(&iso9660->pathname); archive_string_free(&iso9660->previous_pathname); - if (iso9660->pending_files.files) - free(iso9660->pending_files.files); + free(iso9660->pending_files.files); #ifdef HAVE_ZLIB_H free(iso9660->entry_zisofs.uncompressed_buffer); free(iso9660->entry_zisofs.block_pointers); @@ -2102,6 +2101,7 @@ parse_rockridge(struct archive_read *a, struct file_info *file, const unsigned char *p, const unsigned char *end) { struct iso9660 *iso9660; + int entry_seen = 0; iso9660 = (struct iso9660 *)(a->format->data); @@ -2257,8 +2257,16 @@ parse_rockridge(struct archive_read *a, struct file_info *file, } p += p[2]; + entry_seen = 1; + } + + if (entry_seen) + return (ARCHIVE_OK); + else { + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Tried to parse Rockridge extensions, but none found"); + return (ARCHIVE_WARN); } - return (ARCHIVE_OK); } static int @@ -3029,8 +3037,7 @@ heap_add_entry(struct archive_read *a, struct heap_queue *heap, if (heap->allocated) memcpy(new_pending_files, heap->files, heap->allocated * sizeof(new_pending_files[0])); - if (heap->files != NULL) - free(heap->files); + free(heap->files); heap->files = new_pending_files; heap->allocated = new_size; } diff --git a/libarchive/archive_read_support_format_rar5.c b/libarchive/archive_read_support_format_rar5.c index 9314f7a9f9c..22462a6e18d 100644 --- a/libarchive/archive_read_support_format_rar5.c +++ b/libarchive/archive_read_support_format_rar5.c @@ -588,8 +588,7 @@ static int run_filter(struct archive_read* a, struct filter_info* flt) { int ret; struct rar5* rar = get_context(a); - if(rar->cstate.filtered_buf) - free(rar->cstate.filtered_buf); + free(rar->cstate.filtered_buf); rar->cstate.filtered_buf = malloc(flt->block_length); if(!rar->cstate.filtered_buf) { @@ -772,7 +771,7 @@ static void free_filters(struct rar5* rar) { struct filter_info* f = NULL; /* Pop_front will also decrease the collection's size. */ - if(CDE_OK == cdeque_pop_front(d, cdeque_filter_p(&f)) && f != NULL) + if (CDE_OK == cdeque_pop_front(d, cdeque_filter_p(&f))) free(f); } @@ -873,7 +872,7 @@ static int read_var(struct archive_read* a, uint64_t* pvalue, /* Strip the MSB from the input byte and add the resulting number * to the `result`. */ - result += (b & 0x7F) << shift; + result += (b & (uint64_t)0x7F) << shift; /* MSB set to 1 means we need to continue decoding process. MSB set * to 0 means we're done. @@ -1301,7 +1300,7 @@ static int process_head_file(struct archive_read* a, struct rar5* rar, char name_utf8_buf[2048 * 4]; const uint8_t* p; - memset(entry, 0, sizeof(struct archive_entry)); + archive_entry_clear(entry); /* Do not reset file context if we're switching archives. */ if(!rar->cstate.switch_multivolume) { @@ -1795,8 +1794,14 @@ static int skip_base_block(struct archive_read* a) { int ret; struct rar5* rar = get_context(a); - struct archive_entry entry; - ret = process_base_block(a, &entry); + /* Create a new local archive_entry structure that will be operated on + * by header reader; operations on this archive_entry will be discarded. + */ + struct archive_entry* entry = archive_entry_new(); + ret = process_base_block(a, entry); + + /* Discard operations on this archive_entry structure. */ + archive_entry_free(entry); if(rar->generic.last_header_id == 2 && rar->generic.split_before > 0) return ARCHIVE_OK; @@ -1836,13 +1841,14 @@ static int rar5_read_header(struct archive_read *a, static void init_unpack(struct rar5* rar) { rar->file.calculated_crc32 = 0; - rar->cstate.window_mask = rar->cstate.window_size - 1; + if (rar->cstate.window_size) + rar->cstate.window_mask = rar->cstate.window_size - 1; + else + rar->cstate.window_mask = 0; - if(rar->cstate.window_buf) - free(rar->cstate.window_buf); + free(rar->cstate.window_buf); - if(rar->cstate.filtered_buf) - free(rar->cstate.filtered_buf); + free(rar->cstate.filtered_buf); rar->cstate.window_buf = calloc(1, rar->cstate.window_size); rar->cstate.filtered_buf = calloc(1, rar->cstate.window_size); @@ -2676,13 +2682,21 @@ static int merge_block(struct archive_read* a, ssize_t block_size, if(rar->vol.push_buf) free((void*) rar->vol.push_buf); - rar->vol.push_buf = malloc(block_size); + /* Increasing the allocation block by 8 is due to bit reading functions, + * which are using additional 2 or 4 bytes. Allocating the block size + * by exact value would make bit reader perform reads from invalid memory + * block when reading the last byte from the buffer. */ + rar->vol.push_buf = malloc(block_size + 8); if(!rar->vol.push_buf) { archive_set_error(&a->archive, ENOMEM, "Can't allocate memory for a " "merge block buffer."); return ARCHIVE_FATAL; } + /* Valgrind complains if the extension block for bit reader is not + * initialized, so initialize it. */ + memset(&rar->vol.push_buf[block_size], 0, 8); + /* A single block can span across multiple multivolume archive files, * so we use a loop here. This loop will consume enough multivolume * archive files until the whole block is read. */ @@ -3394,14 +3408,11 @@ static int64_t rar5_seek_data(struct archive_read *a, int64_t offset, static int rar5_cleanup(struct archive_read *a) { struct rar5* rar = get_context(a); - if(rar->cstate.window_buf) - free(rar->cstate.window_buf); + free(rar->cstate.window_buf); - if(rar->cstate.filtered_buf) - free(rar->cstate.filtered_buf); + free(rar->cstate.filtered_buf); - if(rar->vol.push_buf) - free(rar->vol.push_buf); + free(rar->vol.push_buf); free_filters(rar); cdeque_free(&rar->cstate.filters); diff --git a/libarchive/archive_read_support_format_xar.c b/libarchive/archive_read_support_format_xar.c index c4dd915a2c0..6ff9cc4be5f 100644 --- a/libarchive/archive_read_support_format_xar.c +++ b/libarchive/archive_read_support_format_xar.c @@ -1229,8 +1229,7 @@ heap_add_entry(struct archive_read *a, } memcpy(new_pending_files, heap->files, heap->allocated * sizeof(new_pending_files[0])); - if (heap->files != NULL) - free(heap->files); + free(heap->files); heap->files = new_pending_files; heap->allocated = new_size; } diff --git a/libarchive/archive_read_support_format_zip.c b/libarchive/archive_read_support_format_zip.c index 420004dbabb..737a25eb1d0 100644 --- a/libarchive/archive_read_support_format_zip.c +++ b/libarchive/archive_read_support_format_zip.c @@ -52,6 +52,12 @@ __FBSDID("$FreeBSD: head/lib/libarchive/archive_read_support_format_zip.c 201102 #ifdef HAVE_ZLIB_H #include #endif +#ifdef HAVE_BZLIB_H +#include +#endif +#ifdef HAVE_LZMA_H +#include +#endif #include "archive.h" #include "archive_digest_private.h" @@ -63,6 +69,7 @@ __FBSDID("$FreeBSD: head/lib/libarchive/archive_read_support_format_zip.c 201102 #include "archive_private.h" #include "archive_rb.h" #include "archive_read_private.h" +#include "archive_ppmd8_private.h" #ifndef HAVE_ZLIB_H #include "archive_crc32.h" @@ -165,13 +172,29 @@ struct zip { char decompress_init; char end_of_entry; -#ifdef HAVE_ZLIB_H unsigned char *uncompressed_buffer; size_t uncompressed_buffer_size; + +#ifdef HAVE_ZLIB_H z_stream stream; char stream_valid; #endif +#if HAVE_LZMA_H && HAVE_LIBLZMA + lzma_stream zipx_lzma_stream; + char zipx_lzma_valid; +#endif + +#ifdef HAVE_BZLIB_H + bz_stream bzstream; + char bzstream_valid; +#endif + + IByteIn zipx_ppmd_stream; + ssize_t zipx_ppmd_read_compressed; + CPpmd8 ppmd8; + char ppmd8_valid; + struct archive_string_conv *sconv; struct archive_string_conv *sconv_default; struct archive_string_conv *sconv_utf8; @@ -222,6 +245,27 @@ struct zip { /* Many systems define min or MIN, but not all. */ #define zipmin(a,b) ((a) < (b) ? (a) : (b)) +/* This function is used by Ppmd8_DecodeSymbol during decompression of Ppmd8 + * streams inside ZIP files. It has 2 purposes: one is to fetch the next + * compressed byte from the stream, second one is to increase the counter how + * many compressed bytes were read. */ +static Byte +ppmd_read(void* p) { + /* Get the handle to current decompression context. */ + struct archive_read *a = ((IByteIn*)p)->a; + struct zip *zip = (struct zip*) a->format->data; + + /* Fetch next byte. */ + const uint8_t* data = __archive_read_ahead(a, 1, NULL); + __archive_read_consume(a, 1); + + /* Increment the counter. */ + ++zip->zipx_ppmd_read_compressed; + + /* Return the next compressed byte. */ + return data[0]; +} + /* ------------------------------------------------------------------------ */ /* @@ -372,6 +416,8 @@ static const struct { {17, "reserved"}, /* Reserved by PKWARE */ {18, "ibm-terse-new"}, /* File is compressed using IBM TERSE (new) */ {19, "ibm-lz777"},/* IBM LZ77 z Architecture (PFS) */ + {95, "xz"}, /* XZ compressed data */ + {96, "jpeg"}, /* JPEG compressed data */ {97, "wav-pack"}, /* WavPack compressed data */ {98, "ppmd-1"}, /* PPMd version I, Rev 1 */ {99, "aes"} /* WinZip AES encryption */ @@ -1296,6 +1342,695 @@ zip_read_data_none(struct archive_read *a, const void **_buff, return (ARCHIVE_OK); } +static int +consume_optional_marker(struct archive_read *a, struct zip *zip) +{ + if (zip->end_of_entry && (zip->entry->zip_flags & ZIP_LENGTH_AT_END)) { + const char *p; + + if (NULL == (p = __archive_read_ahead(a, 24, NULL))) { + archive_set_error(&a->archive, + ARCHIVE_ERRNO_FILE_FORMAT, + "Truncated ZIP end-of-file record"); + return (ARCHIVE_FATAL); + } + /* Consume the optional PK\007\010 marker. */ + if (p[0] == 'P' && p[1] == 'K' && + p[2] == '\007' && p[3] == '\010') { + p += 4; + zip->unconsumed = 4; + } + if (zip->entry->flags & LA_USED_ZIP64) { + uint64_t compressed, uncompressed; + zip->entry->crc32 = archive_le32dec(p); + compressed = archive_le64dec(p + 4); + uncompressed = archive_le64dec(p + 12); + if (compressed > INT64_MAX || uncompressed > INT64_MAX) { + archive_set_error(&a->archive, + ARCHIVE_ERRNO_FILE_FORMAT, + "Overflow of 64-bit file sizes"); + return ARCHIVE_FAILED; + } + zip->entry->compressed_size = compressed; + zip->entry->uncompressed_size = uncompressed; + zip->unconsumed += 20; + } else { + zip->entry->crc32 = archive_le32dec(p); + zip->entry->compressed_size = archive_le32dec(p + 4); + zip->entry->uncompressed_size = archive_le32dec(p + 8); + zip->unconsumed += 12; + } + } + + return (ARCHIVE_OK); +} + +#if HAVE_LZMA_H && HAVE_LIBLZMA +static int +zipx_xz_init(struct archive_read *a, struct zip *zip) +{ + lzma_ret r; + + if(zip->zipx_lzma_valid) { + lzma_end(&zip->zipx_lzma_stream); + zip->zipx_lzma_valid = 0; + } + + memset(&zip->zipx_lzma_stream, 0, sizeof(zip->zipx_lzma_stream)); + r = lzma_stream_decoder(&zip->zipx_lzma_stream, UINT64_MAX, 0); + if (r != LZMA_OK) { + archive_set_error(&(a->archive), ARCHIVE_ERRNO_MISC, + "xz initialization failed(%d)", + r); + + return (ARCHIVE_FAILED); + } + + zip->zipx_lzma_valid = 1; + + free(zip->uncompressed_buffer); + + zip->uncompressed_buffer_size = 256 * 1024; + zip->uncompressed_buffer = + (uint8_t*) malloc(zip->uncompressed_buffer_size); + if (zip->uncompressed_buffer == NULL) { + archive_set_error(&a->archive, ENOMEM, + "No memory for xz decompression"); + return (ARCHIVE_FATAL); + } + + zip->decompress_init = 1; + return (ARCHIVE_OK); +} + +static int +zipx_lzma_alone_init(struct archive_read *a, struct zip *zip) +{ + lzma_ret r; + const uint8_t* p; + +#pragma pack(push) +#pragma pack(1) + struct _alone_header { + uint8_t bytes[5]; + uint64_t uncompressed_size; + } alone_header; +#pragma pack(pop) + + /* To unpack ZIPX's "LZMA" (id 14) stream we can use standard liblzma that + * is a part of XZ Utils. The stream format stored inside ZIPX file is a + * modified "lzma alone" file format, that was used by the `lzma` utility + * which was later deprecated in favour of `xz` utility. Since those + * formats are nearly the same, we can use a standard "lzma alone" decoder + * from XZ Utils. */ + + memset(&zip->zipx_lzma_stream, 0, sizeof(zip->zipx_lzma_stream)); + r = lzma_alone_decoder(&zip->zipx_lzma_stream, UINT64_MAX); + if (r != LZMA_OK) { + archive_set_error(&(a->archive), ARCHIVE_ERRNO_MISC, + "lzma initialization failed(%d)", r); + + return (ARCHIVE_FAILED); + } + + /* Flag the cleanup function that we want our lzma-related structures + * to be freed later. */ + zip->zipx_lzma_valid = 1; + + /* The "lzma alone" file format and the stream format inside ZIPx are + * almost the same. Here's an example of a structure of "lzma alone" + * format: + * + * $ cat /bin/ls | lzma | xxd | head -n 1 + * 00000000: 5d00 0080 00ff ffff ffff ffff ff00 2814 + * + * 5 bytes 8 bytes n bytes + * + * + * lzma_params is a 5-byte blob that has to be decoded to extract + * parameters of this LZMA stream. The uncompressed_size field is an + * uint64_t value that contains information about the size of the + * uncompressed file, or UINT64_MAX if this value is unknown. The + * part is the actual lzma-compressed data stream. + * + * Now here's the structure of the stream inside the ZIPX file: + * + * $ cat stream_inside_zipx | xxd | head -n 1 + * 00000000: 0914 0500 5d00 8000 0000 2814 .... .... + * + * 2byte 2byte 5 bytes n bytes + * + * + * This means that the ZIPX file contains an additional magic1 and magic2 + * headers, the lzma_params field contains the same parameter set as in the + * "lzma alone" format, and the field is the same as in the "lzma + * alone" format as well. Note that also the zipx format is missing the + * uncompressed_size field. + * + * So, in order to use the "lzma alone" decoder for the zipx lzma stream, + * we simply need to shuffle around some fields, prepare a new lzma alone + * header, feed it into lzma alone decoder so it will initialize itself + * properly, and then we can start feeding normal zipx lzma stream into the + * decoder. + */ + + /* Read magic1,magic2,lzma_params from the ZIPX stream. */ + if((p = __archive_read_ahead(a, 9, NULL)) == NULL) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Truncated lzma data"); + return (ARCHIVE_FATAL); + } + + if(p[2] != 0x05 || p[3] != 0x00) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Invalid lzma data"); + return (ARCHIVE_FATAL); + } + + /* Prepare an lzma alone header: copy the lzma_params blob into a proper + * place into the lzma alone header. */ + memcpy(&alone_header.bytes[0], p + 4, 5); + + /* Initialize the 'uncompressed size' field to unknown; we'll manually + * monitor how many bytes there are still to be uncompressed. */ + alone_header.uncompressed_size = UINT64_MAX; + + if(!zip->uncompressed_buffer) { + zip->uncompressed_buffer_size = 256 * 1024; + zip->uncompressed_buffer = + (uint8_t*) malloc(zip->uncompressed_buffer_size); + + if (zip->uncompressed_buffer == NULL) { + archive_set_error(&a->archive, ENOMEM, + "No memory for lzma decompression"); + return (ARCHIVE_FATAL); + } + } + + zip->zipx_lzma_stream.next_in = (void*) &alone_header; + zip->zipx_lzma_stream.avail_in = sizeof(alone_header); + zip->zipx_lzma_stream.total_in = 0; + zip->zipx_lzma_stream.next_out = zip->uncompressed_buffer; + zip->zipx_lzma_stream.avail_out = zip->uncompressed_buffer_size; + zip->zipx_lzma_stream.total_out = 0; + + /* Feed only the header into the lzma alone decoder. This will effectively + * initialize the decoder, and will not produce any output bytes yet. */ + r = lzma_code(&zip->zipx_lzma_stream, LZMA_RUN); + if (r != LZMA_OK) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, + "lzma stream initialization error"); + return ARCHIVE_FATAL; + } + + /* We've already consumed some bytes, so take this into account. */ + __archive_read_consume(a, 9); + zip->entry_bytes_remaining -= 9; + zip->entry_compressed_bytes_read += 9; + + zip->decompress_init = 1; + return (ARCHIVE_OK); +} + +static int +zip_read_data_zipx_xz(struct archive_read *a, const void **buff, + size_t *size, int64_t *offset) +{ + struct zip* zip = (struct zip *)(a->format->data); + int ret; + lzma_ret lz_ret; + const void* compressed_buf; + ssize_t bytes_avail, in_bytes, to_consume = 0; + + (void) offset; /* UNUSED */ + + /* Initialize decompressor if not yet initialized. */ + if (!zip->decompress_init) { + ret = zipx_xz_init(a, zip); + if (ret != ARCHIVE_OK) + return (ret); + } + + compressed_buf = __archive_read_ahead(a, 1, &bytes_avail); + if (bytes_avail < 0) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Truncated xz file body"); + return (ARCHIVE_FATAL); + } + + in_bytes = zipmin(zip->entry_bytes_remaining, bytes_avail); + zip->zipx_lzma_stream.next_in = compressed_buf; + zip->zipx_lzma_stream.avail_in = in_bytes; + zip->zipx_lzma_stream.total_in = 0; + zip->zipx_lzma_stream.next_out = zip->uncompressed_buffer; + zip->zipx_lzma_stream.avail_out = zip->uncompressed_buffer_size; + zip->zipx_lzma_stream.total_out = 0; + + /* Perform the decompression. */ + lz_ret = lzma_code(&zip->zipx_lzma_stream, LZMA_RUN); + switch(lz_ret) { + case LZMA_DATA_ERROR: + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "xz data error (error %d)", (int) lz_ret); + return (ARCHIVE_FATAL); + + case LZMA_NO_CHECK: + case LZMA_OK: + break; + + default: + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "xz unknown error %d", (int) lz_ret); + return (ARCHIVE_FATAL); + + case LZMA_STREAM_END: + lzma_end(&zip->zipx_lzma_stream); + zip->zipx_lzma_valid = 0; + + if((int64_t) zip->zipx_lzma_stream.total_in != + zip->entry_bytes_remaining) + { + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "xz premature end of stream"); + return (ARCHIVE_FATAL); + } + + zip->end_of_entry = 1; + break; + } + + to_consume = zip->zipx_lzma_stream.total_in; + + __archive_read_consume(a, to_consume); + zip->entry_bytes_remaining -= to_consume; + zip->entry_compressed_bytes_read += to_consume; + zip->entry_uncompressed_bytes_read += zip->zipx_lzma_stream.total_out; + + *size = zip->zipx_lzma_stream.total_out; + *buff = zip->uncompressed_buffer; + + ret = consume_optional_marker(a, zip); + if (ret != ARCHIVE_OK) + return (ret); + + return (ARCHIVE_OK); +} + +static int +zip_read_data_zipx_lzma_alone(struct archive_read *a, const void **buff, + size_t *size, int64_t *offset) +{ + struct zip* zip = (struct zip *)(a->format->data); + int ret; + lzma_ret lz_ret; + const void* compressed_buf; + ssize_t bytes_avail, in_bytes, to_consume; + + (void) offset; /* UNUSED */ + + /* Initialize decompressor if not yet initialized. */ + if (!zip->decompress_init) { + ret = zipx_lzma_alone_init(a, zip); + if (ret != ARCHIVE_OK) + return (ret); + } + + /* Fetch more compressed data. The same note as in deflate handler applies + * here as well: + * + * Note: '1' here is a performance optimization. Recall that the + * decompression layer returns a count of available bytes; asking for more + * than that forces the decompressor to combine reads by copying data. + */ + compressed_buf = __archive_read_ahead(a, 1, &bytes_avail); + if (bytes_avail < 0) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Truncated lzma file body"); + return (ARCHIVE_FATAL); + } + + /* Set decompressor parameters. */ + in_bytes = zipmin(zip->entry_bytes_remaining, bytes_avail); + + zip->zipx_lzma_stream.next_in = compressed_buf; + zip->zipx_lzma_stream.avail_in = in_bytes; + zip->zipx_lzma_stream.total_in = 0; + zip->zipx_lzma_stream.next_out = zip->uncompressed_buffer; + zip->zipx_lzma_stream.avail_out = + /* These lzma_alone streams lack end of stream marker, so let's make + * sure the unpacker won't try to unpack more than it's supposed to. */ + zipmin((int64_t) zip->uncompressed_buffer_size, + zip->entry->uncompressed_size - + zip->entry_uncompressed_bytes_read); + zip->zipx_lzma_stream.total_out = 0; + + /* Perform the decompression. */ + lz_ret = lzma_code(&zip->zipx_lzma_stream, LZMA_RUN); + switch(lz_ret) { + case LZMA_DATA_ERROR: + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "lzma data error (error %d)", (int) lz_ret); + return (ARCHIVE_FATAL); + + case LZMA_OK: + break; + + default: + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "lzma unknown error %d", (int) lz_ret); + return (ARCHIVE_FATAL); + } + + to_consume = zip->zipx_lzma_stream.total_in; + + /* Update pointers. */ + __archive_read_consume(a, to_consume); + zip->entry_bytes_remaining -= to_consume; + zip->entry_compressed_bytes_read += to_consume; + zip->entry_uncompressed_bytes_read += zip->zipx_lzma_stream.total_out; + + if(zip->entry_bytes_remaining == 0) { + zip->end_of_entry = 1; + } + + /* Return values. */ + *size = zip->zipx_lzma_stream.total_out; + *buff = zip->uncompressed_buffer; + + /* Behave the same way as during deflate decompression. */ + ret = consume_optional_marker(a, zip); + if (ret != ARCHIVE_OK) + return (ret); + + /* Free lzma decoder handle because we'll no longer need it. */ + if(zip->end_of_entry) { + lzma_end(&zip->zipx_lzma_stream); + zip->zipx_lzma_valid = 0; + } + + /* If we're here, then we're good! */ + return (ARCHIVE_OK); +} +#endif /* HAVE_LZMA_H && HAVE_LIBLZMA */ + +static int +zipx_ppmd8_init(struct archive_read *a, struct zip *zip) +{ + const void* p; + uint32_t val; + uint32_t order; + uint32_t mem; + uint32_t restore_method; + + /* Remove previous decompression context if it exists. */ + if(zip->ppmd8_valid) { + __archive_ppmd8_functions.Ppmd8_Free(&zip->ppmd8); + zip->ppmd8_valid = 0; + } + + /* Create a new decompression context. */ + __archive_ppmd8_functions.Ppmd8_Construct(&zip->ppmd8); + + /* Setup function pointers required by Ppmd8 decompressor. The + * 'ppmd_read' function will feed new bytes to the decompressor, + * and will increment the 'zip->zipx_ppmd_read_compressed' counter. */ + zip->ppmd8.Stream.In = &zip->zipx_ppmd_stream; + zip->zipx_ppmd_stream.a = a; + zip->zipx_ppmd_stream.Read = &ppmd_read; + + /* Reset number of read bytes to 0. */ + zip->zipx_ppmd_read_compressed = 0; + + /* Read Ppmd8 header (2 bytes). */ + p = __archive_read_ahead(a, 2, NULL); + if(!p) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Truncated file data in PPMd8 stream"); + return (ARCHIVE_FATAL); + } + __archive_read_consume(a, 2); + + /* Decode the stream's compression parameters. */ + val = archive_le16dec(p); + order = (val & 15) + 1; + mem = ((val >> 4) & 0xff) + 1; + restore_method = (val >> 12); + + if(order < 2 || restore_method > 2) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Invalid parameter set in PPMd8 stream (order=%d, " + "restore=%d)", order, restore_method); + return (ARCHIVE_FAILED); + } + + /* Allocate the memory needed to properly decompress the file. */ + if(!__archive_ppmd8_functions.Ppmd8_Alloc(&zip->ppmd8, mem << 20)) { + archive_set_error(&a->archive, ENOMEM, + "Unable to allocate memory for PPMd8 stream: %d bytes", + mem << 20); + return (ARCHIVE_FATAL); + } + + /* Signal the cleanup function to release Ppmd8 context in the + * cleanup phase. */ + zip->ppmd8_valid = 1; + + /* Perform further Ppmd8 initialization. */ + if(!__archive_ppmd8_functions.Ppmd8_RangeDec_Init(&zip->ppmd8)) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, + "PPMd8 stream range decoder initialization error"); + return (ARCHIVE_FATAL); + } + + __archive_ppmd8_functions.Ppmd8_Init(&zip->ppmd8, order, restore_method); + + /* Allocate the buffer that will hold uncompressed data. */ + free(zip->uncompressed_buffer); + + zip->uncompressed_buffer_size = 256 * 1024; + zip->uncompressed_buffer = + (uint8_t*) malloc(zip->uncompressed_buffer_size); + + if(zip->uncompressed_buffer == NULL) { + archive_set_error(&a->archive, ENOMEM, + "No memory for PPMd8 decompression"); + return ARCHIVE_FATAL; + } + + /* Ppmd8 initialization is done. */ + zip->decompress_init = 1; + + /* We've already read 2 bytes in the output stream. Additionally, + * Ppmd8 initialization code could read some data as well. So we + * are advancing the stream by 2 bytes plus whatever number of + * bytes Ppmd8 init function used. */ + zip->entry_compressed_bytes_read += 2 + zip->zipx_ppmd_read_compressed; + + return ARCHIVE_OK; +} + +static int +zip_read_data_zipx_ppmd(struct archive_read *a, const void **buff, + size_t *size, int64_t *offset) +{ + struct zip* zip = (struct zip *)(a->format->data); + int ret; + size_t consumed_bytes = 0; + ssize_t bytes_avail = 0; + + (void) offset; /* UNUSED */ + + /* If we're here for the first time, initialize Ppmd8 decompression + * context first. */ + if(!zip->decompress_init) { + ret = zipx_ppmd8_init(a, zip); + if(ret != ARCHIVE_OK) + return ret; + } + + /* Fetch for more data. We're reading 1 byte here, but libarchive should + * prefetch more bytes. */ + (void) __archive_read_ahead(a, 1, &bytes_avail); + if(bytes_avail < 0) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Truncated PPMd8 file body"); + return (ARCHIVE_FATAL); + } + + /* This counter will be updated inside ppmd_read(), which at one + * point will be called by Ppmd8_DecodeSymbol. */ + zip->zipx_ppmd_read_compressed = 0; + + /* Decompression loop. */ + do { + int sym = __archive_ppmd8_functions.Ppmd8_DecodeSymbol(&zip->ppmd8); + if(sym < 0) { + zip->end_of_entry = 1; + break; + } + + zip->uncompressed_buffer[consumed_bytes] = (uint8_t) sym; + ++consumed_bytes; + } while(consumed_bytes < zip->uncompressed_buffer_size); + + /* Update pointers for libarchive. */ + *buff = zip->uncompressed_buffer; + *size = consumed_bytes; + + /* Update pointers so we can continue decompression in another call. */ + zip->entry_bytes_remaining -= zip->zipx_ppmd_read_compressed; + zip->entry_compressed_bytes_read += zip->zipx_ppmd_read_compressed; + zip->entry_uncompressed_bytes_read += consumed_bytes; + + /* If we're at the end of stream, deinitialize Ppmd8 context. */ + if(zip->end_of_entry) { + __archive_ppmd8_functions.Ppmd8_Free(&zip->ppmd8); + zip->ppmd8_valid = 0; + } + + /* Seek for optional marker, same way as in each zip entry. */ + ret = consume_optional_marker(a, zip); + if (ret != ARCHIVE_OK) + return ret; + + return ARCHIVE_OK; +} + +#ifdef HAVE_BZLIB_H +static int +zipx_bzip2_init(struct archive_read *a, struct zip *zip) +{ + int r; + + /* Deallocate already existing BZ2 decompression context if it + * exists. */ + if(zip->bzstream_valid) { + BZ2_bzDecompressEnd(&zip->bzstream); + zip->bzstream_valid = 0; + } + + /* Allocate a new BZ2 decompression context. */ + memset(&zip->bzstream, 0, sizeof(bz_stream)); + r = BZ2_bzDecompressInit(&zip->bzstream, 0, 1); + if(r != BZ_OK) { + archive_set_error(&(a->archive), ARCHIVE_ERRNO_MISC, + "bzip2 initialization failed(%d)", + r); + + return ARCHIVE_FAILED; + } + + /* Mark the bzstream field to be released in cleanup phase. */ + zip->bzstream_valid = 1; + + /* (Re)allocate the buffer that will contain decompressed bytes. */ + free(zip->uncompressed_buffer); + + zip->uncompressed_buffer_size = 256 * 1024; + zip->uncompressed_buffer = + (uint8_t*) malloc(zip->uncompressed_buffer_size); + if (zip->uncompressed_buffer == NULL) { + archive_set_error(&a->archive, ENOMEM, + "No memory for bzip2 decompression"); + return ARCHIVE_FATAL; + } + + /* Initialization done. */ + zip->decompress_init = 1; + return ARCHIVE_OK; +} + +static int +zip_read_data_zipx_bzip2(struct archive_read *a, const void **buff, + size_t *size, int64_t *offset) +{ + struct zip *zip = (struct zip *)(a->format->data); + ssize_t bytes_avail = 0, in_bytes, to_consume; + const void *compressed_buff; + int r; + uint64_t total_out; + + (void) offset; /* UNUSED */ + + /* Initialize decompression context if we're here for the first time. */ + if(!zip->decompress_init) { + r = zipx_bzip2_init(a, zip); + if(r != ARCHIVE_OK) + return r; + } + + /* Fetch more compressed bytes. */ + compressed_buff = __archive_read_ahead(a, 1, &bytes_avail); + if(bytes_avail < 0) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, + "Truncated bzip2 file body"); + return (ARCHIVE_FATAL); + } + + in_bytes = zipmin(zip->entry_bytes_remaining, bytes_avail); + + /* Setup buffer boundaries. */ + zip->bzstream.next_in = (char*)(uintptr_t) compressed_buff; + zip->bzstream.avail_in = in_bytes; + zip->bzstream.total_in_hi32 = 0; + zip->bzstream.total_in_lo32 = 0; + zip->bzstream.next_out = (char*) zip->uncompressed_buffer; + zip->bzstream.avail_out = zip->uncompressed_buffer_size; + zip->bzstream.total_out_hi32 = 0; + zip->bzstream.total_out_lo32 = 0; + + /* Perform the decompression. */ + r = BZ2_bzDecompress(&zip->bzstream); + switch(r) { + case BZ_STREAM_END: + /* If we're at the end of the stream, deinitialize the + * decompression context now. */ + switch(BZ2_bzDecompressEnd(&zip->bzstream)) { + case BZ_OK: + break; + default: + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "Failed to clean up bzip2 decompressor"); + return ARCHIVE_FATAL; + } + + zip->end_of_entry = 1; + break; + case BZ_OK: + /* The decompressor has successfully decoded this chunk of + * data, but more data is still in queue. */ + break; + default: + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "bzip2 decompression failed"); + return ARCHIVE_FATAL; + } + + /* Update the pointers so decompressor can continue decoding. */ + to_consume = zip->bzstream.total_in_lo32; + __archive_read_consume(a, to_consume); + + total_out = ((uint64_t) zip->bzstream.total_out_hi32 << 32) + + zip->bzstream.total_out_lo32; + + zip->entry_bytes_remaining -= to_consume; + zip->entry_compressed_bytes_read += to_consume; + zip->entry_uncompressed_bytes_read += total_out; + + /* Give libarchive its due. */ + *size = total_out; + *buff = zip->uncompressed_buffer; + + /* Seek for optional marker, like in other entries. */ + r = consume_optional_marker(a, zip); + if(r != ARCHIVE_OK) + return r; + + return ARCHIVE_OK; +} + +#endif + #ifdef HAVE_ZLIB_H static int zip_deflate_init(struct archive_read *a, struct zip *zip) @@ -1470,42 +2205,9 @@ zip_read_data_deflate(struct archive_read *a, const void **buff, return (r); } - if (zip->end_of_entry && (zip->entry->zip_flags & ZIP_LENGTH_AT_END)) { - const char *p; - - if (NULL == (p = __archive_read_ahead(a, 24, NULL))) { - archive_set_error(&a->archive, - ARCHIVE_ERRNO_FILE_FORMAT, - "Truncated ZIP end-of-file record"); - return (ARCHIVE_FATAL); - } - /* Consume the optional PK\007\010 marker. */ - if (p[0] == 'P' && p[1] == 'K' && - p[2] == '\007' && p[3] == '\010') { - p += 4; - zip->unconsumed = 4; - } - if (zip->entry->flags & LA_USED_ZIP64) { - uint64_t compressed, uncompressed; - zip->entry->crc32 = archive_le32dec(p); - compressed = archive_le64dec(p + 4); - uncompressed = archive_le64dec(p + 12); - if (compressed > INT64_MAX || uncompressed > INT64_MAX) { - archive_set_error(&a->archive, - ARCHIVE_ERRNO_FILE_FORMAT, - "Overflow of 64-bit file sizes"); - return ARCHIVE_FAILED; - } - zip->entry->compressed_size = compressed; - zip->entry->uncompressed_size = uncompressed; - zip->unconsumed += 20; - } else { - zip->entry->crc32 = archive_le32dec(p); - zip->entry->compressed_size = archive_le32dec(p + 4); - zip->entry->uncompressed_size = archive_le32dec(p + 8); - zip->unconsumed += 12; - } - } + r = consume_optional_marker(a, zip); + if (r != ARCHIVE_OK) + return (r); return (ARCHIVE_OK); } @@ -1933,6 +2635,24 @@ archive_read_format_zip_read_data(struct archive_read *a, case 0: /* No compression. */ r = zip_read_data_none(a, buff, size, offset); break; +#ifdef HAVE_BZLIB_H + case 12: /* ZIPx bzip2 compression. */ + r = zip_read_data_zipx_bzip2(a, buff, size, offset); + break; +#endif +#if HAVE_LZMA_H && HAVE_LIBLZMA + case 14: /* ZIPx LZMA compression. */ + r = zip_read_data_zipx_lzma_alone(a, buff, size, offset); + break; + case 95: /* ZIPx XZ compression. */ + r = zip_read_data_zipx_xz(a, buff, size, offset); + break; +#endif + /* PPMd support is built-in, so we don't need any #if guards. */ + case 98: /* ZIPx PPMd compression. */ + r = zip_read_data_zipx_ppmd(a, buff, size, offset); + break; + #ifdef HAVE_ZLIB_H case 8: /* Deflate compression. */ r = zip_read_data_deflate(a, buff, size, offset); @@ -1941,8 +2661,8 @@ archive_read_format_zip_read_data(struct archive_read *a, default: /* Unsupported compression. */ /* Return a warning. */ archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, - "Unsupported ZIP compression method (%s)", - compression_name(zip->entry->compression)); + "Unsupported ZIP compression method (%d: %s)", + zip->entry->compression, compression_name(zip->entry->compression)); /* We can't decompress this entry, but we will * be able to skip() it and try the next entry. */ return (ARCHIVE_FAILED); @@ -2000,11 +2720,29 @@ archive_read_format_zip_cleanup(struct archive_read *a) struct zip_entry *zip_entry, *next_zip_entry; zip = (struct zip *)(a->format->data); + #ifdef HAVE_ZLIB_H if (zip->stream_valid) inflateEnd(&zip->stream); - free(zip->uncompressed_buffer); #endif + +#if HAVA_LZMA_H && HAVE_LIBLZMA + if (zip->zipx_lzma_valid) { + lzma_end(&zip->zipx_lzma_stream); + } +#endif + +#ifdef HAVE_BZLIB_H + if (zip->bzstream_valid) { + BZ2_bzDecompressEnd(&zip->bzstream); + } +#endif + + free(zip->uncompressed_buffer); + + if (zip->ppmd8_valid) + __archive_ppmd8_functions.Ppmd8_Free(&zip->ppmd8); + if (zip->zip_entries) { zip_entry = zip->zip_entries; while (zip_entry != NULL) { @@ -3146,3 +3884,5 @@ archive_read_support_format_zip_seekable(struct archive *_a) free(zip); return (ARCHIVE_OK); } + +/*# vim:set noet:*/ diff --git a/libarchive/archive_write_disk_posix.c b/libarchive/archive_write_disk_posix.c index 003e17d7736..3ed281df23f 100644 --- a/libarchive/archive_write_disk_posix.c +++ b/libarchive/archive_write_disk_posix.c @@ -1791,10 +1791,8 @@ finish_metadata: a->fd = -1; } /* If there's an entry, we can release it now. */ - if (a->entry) { - archive_entry_free(a->entry); - a->entry = NULL; - } + archive_entry_free(a->entry); + a->entry = NULL; a->archive.state = ARCHIVE_STATE_HEADER; return (ret); } @@ -2398,8 +2396,7 @@ _archive_write_disk_free(struct archive *_a) ret = _archive_write_disk_close(&a->archive); archive_write_disk_set_group_lookup(&a->archive, NULL, NULL, NULL); archive_write_disk_set_user_lookup(&a->archive, NULL, NULL, NULL); - if (a->entry) - archive_entry_free(a->entry); + archive_entry_free(a->entry); archive_string_free(&a->_name_data); archive_string_free(&a->archive.error_string); archive_string_free(&a->path_safe); diff --git a/libarchive/archive_write_disk_set_standard_lookup.c b/libarchive/archive_write_disk_set_standard_lookup.c index 5c766d75dd0..5fccdb9dc65 100644 --- a/libarchive/archive_write_disk_set_standard_lookup.c +++ b/libarchive/archive_write_disk_set_standard_lookup.c @@ -114,8 +114,7 @@ lookup_gid(void *private_data, const char *gname, int64_t gid) return ((gid_t)b->id); /* Free the cache slot for a new entry. */ - if (b->name != NULL) - free(b->name); + free(b->name); b->name = strdup(gname); /* Note: If strdup fails, that's okay; we just won't cache. */ b->hash = h; @@ -184,8 +183,7 @@ lookup_uid(void *private_data, const char *uname, int64_t uid) return ((uid_t)b->id); /* Free the cache slot for a new entry. */ - if (b->name != NULL) - free(b->name); + free(b->name); b->name = strdup(uname); /* Note: If strdup fails, that's okay; we just won't cache. */ b->hash = h; diff --git a/libarchive/archive_write_disk_windows.c b/libarchive/archive_write_disk_windows.c index 78eda4abc8d..135dd97eaca 100644 --- a/libarchive/archive_write_disk_windows.c +++ b/libarchive/archive_write_disk_windows.c @@ -696,10 +696,8 @@ _archive_write_disk_header(struct archive *_a, struct archive_entry *entry) a->pst = NULL; a->current_fixup = NULL; a->deferred = 0; - if (a->entry) { - archive_entry_free(a->entry); - a->entry = NULL; - } + archive_entry_free(a->entry); + a->entry = NULL; a->entry = archive_entry_clone(entry); a->fh = INVALID_HANDLE_VALUE; a->fd_offset = 0; @@ -1145,10 +1143,8 @@ _archive_write_disk_finish_entry(struct archive *_a) a->fh = INVALID_HANDLE_VALUE; } /* If there's an entry, we can release it now. */ - if (a->entry) { - archive_entry_free(a->entry); - a->entry = NULL; - } + archive_entry_free(a->entry); + a->entry = NULL; a->archive.state = ARCHIVE_STATE_HEADER; return (ret); } @@ -1690,8 +1686,7 @@ _archive_write_disk_free(struct archive *_a) ret = _archive_write_disk_close(&a->archive); archive_write_disk_set_group_lookup(&a->archive, NULL, NULL, NULL); archive_write_disk_set_user_lookup(&a->archive, NULL, NULL, NULL); - if (a->entry) - archive_entry_free(a->entry); + archive_entry_free(a->entry); archive_wstring_free(&a->_name_data); archive_string_free(&a->archive.error_string); archive_wstring_free(&a->path_safe); diff --git a/libarchive/archive_write_set_format_ar.c b/libarchive/archive_write_set_format_ar.c index 50305ccbeda..253cac82efe 100644 --- a/libarchive/archive_write_set_format_ar.c +++ b/libarchive/archive_write_set_format_ar.c @@ -187,6 +187,11 @@ archive_write_ar_header(struct archive_write *a, struct archive_entry *entry) buff[AR_name_offset] = '/'; goto stat; } + if (strcmp(pathname, "/SYM64/") == 0) { + /* Entry is archive symbol table in GNU 64-bit format */ + memcpy(buff + AR_name_offset, "/SYM64/", 7); + goto stat; + } if (strcmp(pathname, "__.SYMDEF") == 0) { /* Entry is archive symbol table in BSD format */ memcpy(buff + AR_name_offset, "__.SYMDEF", 9); diff --git a/libarchive/archive_write_set_format_cpio.c b/libarchive/archive_write_set_format_cpio.c index a4c9d1ed276..16cefad7b5b 100644 --- a/libarchive/archive_write_set_format_cpio.c +++ b/libarchive/archive_write_set_format_cpio.c @@ -408,8 +408,7 @@ write_header(struct archive_write *a, struct archive_entry *entry) } } exit_write_header: - if (entry_main) - archive_entry_free(entry_main); + archive_entry_free(entry_main); return (ret_final); } diff --git a/libarchive/archive_write_set_format_cpio_newc.c b/libarchive/archive_write_set_format_cpio_newc.c index 957f1a333a6..2d923cc3306 100644 --- a/libarchive/archive_write_set_format_cpio_newc.c +++ b/libarchive/archive_write_set_format_cpio_newc.c @@ -366,8 +366,7 @@ write_header(struct archive_write *a, struct archive_entry *entry) } } exit_write_header: - if (entry_main) - archive_entry_free(entry_main); + archive_entry_free(entry_main); return (ret_final); } diff --git a/libarchive/archive_write_set_format_gnutar.c b/libarchive/archive_write_set_format_gnutar.c index 1966c53fff1..e7757c22bad 100644 --- a/libarchive/archive_write_set_format_gnutar.c +++ b/libarchive/archive_write_set_format_gnutar.c @@ -565,8 +565,7 @@ archive_write_gnutar_header(struct archive_write *a, gnutar->entry_bytes_remaining = archive_entry_size(entry); gnutar->entry_padding = 0x1ff & (-(int64_t)gnutar->entry_bytes_remaining); exit_write_header: - if (entry_main) - archive_entry_free(entry_main); + archive_entry_free(entry_main); return (ret); } diff --git a/libarchive/archive_write_set_format_shar.c b/libarchive/archive_write_set_format_shar.c index 5be310a0781..600c88257a0 100644 --- a/libarchive/archive_write_set_format_shar.c +++ b/libarchive/archive_write_set_format_shar.c @@ -169,8 +169,7 @@ archive_write_shar_header(struct archive_write *a, struct archive_entry *entry) } /* Save the entry for the closing. */ - if (shar->entry) - archive_entry_free(shar->entry); + archive_entry_free(shar->entry); shar->entry = archive_entry_clone(entry); name = archive_entry_pathname(entry); @@ -289,8 +288,7 @@ archive_write_shar_header(struct archive_write *a, struct archive_entry *entry) "mkdir -p %s > /dev/null 2>&1\n", shar->quoted_name.s); /* Record that we just created this directory. */ - if (shar->last_dir != NULL) - free(shar->last_dir); + free(shar->last_dir); shar->last_dir = strdup(name); /* Trim a trailing '/'. */ diff --git a/libarchive/archive_write_set_format_ustar.c b/libarchive/archive_write_set_format_ustar.c index c54aeabdb19..ad4ccb77ea5 100644 --- a/libarchive/archive_write_set_format_ustar.c +++ b/libarchive/archive_write_set_format_ustar.c @@ -352,14 +352,12 @@ archive_write_ustar_header(struct archive_write *a, struct archive_entry *entry) #endif ret = __archive_write_format_header_ustar(a, buff, entry, -1, 1, sconv); if (ret < ARCHIVE_WARN) { - if (entry_main) - archive_entry_free(entry_main); + archive_entry_free(entry_main); return (ret); } ret2 = __archive_write_output(a, buff, 512); if (ret2 < ARCHIVE_WARN) { - if (entry_main) - archive_entry_free(entry_main); + archive_entry_free(entry_main); return (ret2); } if (ret2 < ret) @@ -367,8 +365,7 @@ archive_write_ustar_header(struct archive_write *a, struct archive_entry *entry) ustar->entry_bytes_remaining = archive_entry_size(entry); ustar->entry_padding = 0x1ff & (-(int64_t)ustar->entry_bytes_remaining); - if (entry_main) - archive_entry_free(entry_main); + archive_entry_free(entry_main); return (ret); } diff --git a/libarchive/archive_write_set_format_v7tar.c b/libarchive/archive_write_set_format_v7tar.c index 53c0db0e204..1fdaafd2a93 100644 --- a/libarchive/archive_write_set_format_v7tar.c +++ b/libarchive/archive_write_set_format_v7tar.c @@ -330,14 +330,12 @@ archive_write_v7tar_header(struct archive_write *a, struct archive_entry *entry) #endif ret = format_header_v7tar(a, buff, entry, 1, sconv); if (ret < ARCHIVE_WARN) { - if (entry_main) - archive_entry_free(entry_main); + archive_entry_free(entry_main); return (ret); } ret2 = __archive_write_output(a, buff, 512); if (ret2 < ARCHIVE_WARN) { - if (entry_main) - archive_entry_free(entry_main); + archive_entry_free(entry_main); return (ret2); } if (ret2 < ret) @@ -345,8 +343,7 @@ archive_write_v7tar_header(struct archive_write *a, struct archive_entry *entry) v7tar->entry_bytes_remaining = archive_entry_size(entry); v7tar->entry_padding = 0x1ff & (-(int64_t)v7tar->entry_bytes_remaining); - if (entry_main) - archive_entry_free(entry_main); + archive_entry_free(entry_main); return (ret); } diff --git a/libarchive/archive_write_set_format_zip.c b/libarchive/archive_write_set_format_zip.c index f69b8467f44..7fcd1a07b3f 100644 --- a/libarchive/archive_write_set_format_zip.c +++ b/libarchive/archive_write_set_format_zip.c @@ -564,10 +564,8 @@ archive_write_zip_header(struct archive_write *a, struct archive_entry *entry) zip->entry_uses_zip64 = 0; zip->entry_crc32 = zip->crc32func(0, NULL, 0); zip->entry_encryption = 0; - if (zip->entry != NULL) { - archive_entry_free(zip->entry); - zip->entry = NULL; - } + archive_entry_free(zip->entry); + zip->entry = NULL; if (zip->cctx_valid) archive_encrypto_aes_ctr_release(&zip->cctx); @@ -1430,6 +1428,9 @@ write_path(struct archive_entry *entry, struct archive_write *archive) type = archive_entry_filetype(entry); written_bytes = 0; + if (path == NULL) + return (ARCHIVE_FATAL); + ret = __archive_write_output(archive, path, strlen(path)); if (ret != ARCHIVE_OK) return (ARCHIVE_FATAL); diff --git a/libarchive/test/CMakeLists.txt b/libarchive/test/CMakeLists.txt index 3927748576c..690a83c5cd6 100644 --- a/libarchive/test/CMakeLists.txt +++ b/libarchive/test/CMakeLists.txt @@ -58,7 +58,6 @@ IF(ENABLE_TEST) test_compat_lzma.c test_compat_lzop.c test_compat_mac.c - test_compat_pax_libarchive_2x.c test_compat_perl_archive_tar.c test_compat_plexus_archiver_tar.c test_compat_solaris_pax_sparse.c diff --git a/libarchive/test/test_compat_pax_libarchive_2x.c b/libarchive/test/test_compat_pax_libarchive_2x.c deleted file mode 100644 index 4830d9e43a1..00000000000 --- a/libarchive/test/test_compat_pax_libarchive_2x.c +++ /dev/null @@ -1,153 +0,0 @@ -/*- - * Copyright (c) 2011 Michihiro NAKAJIMA - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -#include "test.h" -__FBSDID("$FreeBSD"); - -#include - -/* - * Test "tar:compat-2x" option that enables the string conversion of - * libarchive 2.x, which made incorrect UTF-8 form filenames for the - * pax format on some platform the wchar_t of which was not Unicode form. - * The option is unneeded if people have been using UTF-8 locale during - * making tar files(in pax format). - * - * NOTE: The sample tar file was made with bsdtar 2.x in LANG=KOI8-R on - * FreeBSD. - */ - -DEFINE_TEST(test_compat_pax_libarchive_2x) -{ -#if (defined(_WIN32) && !defined(__CYGWIN__)) \ - || defined(__STDC_ISO_10646__) || defined(__APPLE__) - skipping("This test only for the platform the WCS of which is " - "not Unicode."); -#else - struct archive *a; - struct archive_entry *ae; - char c; - wchar_t wc; - const char *refname = "test_compat_pax_libarchive_2x.tar.Z"; - - /* - * Read incorrect format UTF-8 filename in ru_RU.KOI8-R with - * "tar:compat-2x" option. We should correctly - * read two filenames. - */ - if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) { - skipping("ru_RU.KOI8-R locale not available on this system."); - return; - } - - /* - * Test if wchar_t format is the same as FreeBSD wchar_t. - */ - assert(-1 != wctomb(NULL, L'\0')); - wc = (wchar_t)0xd0; - c = 0; - if (wctomb(&c, wc) != 1 || (unsigned char)c != 0xd0) { - skipping("wchar_t format is different on this platform."); - return; - } - - extract_reference_file(refname); - - assert((a = archive_read_new()) != NULL); - assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); - assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); - assertEqualIntA(a, ARCHIVE_OK, - archive_read_set_options(a, "tar:compat-2x")); - assertEqualIntA(a, ARCHIVE_OK, - archive_read_open_filename(a, refname, 10240)); - - /* Verify regular first file. */ - assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); - assertEqualString("\xd0\xd2\xc9\xd7\xc5\xd4", - archive_entry_pathname(ae)); - assertEqualInt(6, archive_entry_size(ae)); - - /* Verify regular second file. */ - assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); - assertEqualString("\xf0\xf2\xe9\xf7\xe5\xf4", - archive_entry_pathname(ae)); - assertEqualInt(6, archive_entry_size(ae)); - - - /* End of archive. */ - assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); - - /* Verify archive format. */ - assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0)); - assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE, - archive_format(a)); - - /* Close the archive. */ - assertEqualInt(ARCHIVE_OK, archive_read_close(a)); - assertEqualInt(ARCHIVE_OK, archive_read_free(a)); - - /* - * Without "tar:compat-2x" option. - * Neither first file name nor second file name can be translated - * to KOI8-R. - */ - assert((a = archive_read_new()) != NULL); - assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); - assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); - assertEqualIntA(a, ARCHIVE_OK, - archive_read_open_filename(a, refname, 10240)); - - /* We cannot correctly read the filename. */ - // This test used to look for WARN here coming from a - // character-conversion failure. But: Newer iconv tables are - // more tolerant so we can't always detect the conversion - // failures. - assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); - assert(strcmp("\xd0\xd2\xc9\xd7\xc5\xd4", - archive_entry_pathname(ae)) != 0); - assertEqualInt(6, archive_entry_size(ae)); - - /* We cannot correctly read the filename. */ - // Same here: The test is still valid (it sill verifies that - // the converted pathname is different), but we can no longer - // rely on WARN here. - assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); - assert(strcmp("\xf0\xf2\xe9\xf7\xe5\xf4", - archive_entry_pathname(ae)) != 0); - assertEqualInt(6, archive_entry_size(ae)); - - - /* End of archive. */ - assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); - - /* Verify archive format. */ - assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0)); - assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE, - archive_format(a)); - - /* Close the archive. */ - assertEqualInt(ARCHIVE_OK, archive_read_close(a)); - assertEqualInt(ARCHIVE_OK, archive_read_free(a)); -#endif -} diff --git a/libarchive/test/test_compat_pax_libarchive_2x.tar.Z.uu b/libarchive/test/test_compat_pax_libarchive_2x.tar.Z.uu deleted file mode 100644 index f44054118c1..00000000000 --- a/libarchive/test/test_compat_pax_libarchive_2x.tar.Z.uu +++ /dev/null @@ -1,15 +0,0 @@ -begin 644 test_compat_pax_libarchive_2x.tar.Z -M'YV04,+@05(F#)DR3.FQ94D;-D#$B%&#QHT9,CS.B`DCADT;-P"`P,.QJ-&C2)-:K#.' -M3A@Y)&&,J5-&:<:I5:U>=.F1IV5P]@Q:=!HW;P87IE'C!@W-.%9_#NW"#9O7:P8K,$N\ -MN/'CR),K7\Z\N?.)"QL^?$[=:$N0(J.:K(%2^\JH7%O&G%GS9DX8.T'T+`PC -MJ/KJ\)$R=0K5(];B]XF']]H2;/S_`"H'CSSIW%,./0$FJ.""##;HX(,01@A` -M0`,5=%!"`Q9XH(3Z?1022]MUIQ)W_+D4`TPRT6033CKQ1),,0`E%%(?PS?=4 -M5/F9E6-9^X'7'XU`-H@6"&JQY18LP\@R3"K#W#),*.*E6!Z+Z+DX0V'NP4!HJ,<%NB&IJ*:JZJJLMNKJJ[#&*NNLM-9JZZVXYDH< -` -end diff --git a/libarchive/test/test_read_format_zip.c b/libarchive/test/test_read_format_zip.c index 6cc25a6676f..b965299a4f5 100644 --- a/libarchive/test/test_read_format_zip.c +++ b/libarchive/test/test_read_format_zip.c @@ -26,6 +26,76 @@ #include "test.h" __FBSDID("$FreeBSD: head/lib/libarchive/test/test_read_format_zip.c 189482 2009-03-07 03:30:35Z kientzle $"); +#define __LIBARCHIVE_BUILD +#include + +static +int extract_one(struct archive* a, struct archive_entry* ae, uint32_t crc) +{ + la_ssize_t fsize, bytes_read; + uint8_t* buf; + int ret = 1; + uint32_t computed_crc; + + fsize = archive_entry_size(ae); + buf = malloc(fsize); + if(buf == NULL) + return 1; + + bytes_read = archive_read_data(a, buf, fsize); + if(bytes_read != fsize) { + assertEqualInt(bytes_read, fsize); + goto fn_exit; + } + + computed_crc = crc32(0, buf, fsize); + assertEqualInt(computed_crc, crc); + ret = 0; + +fn_exit: + free(buf); + return ret; +} + +static +int extract_one_using_blocks(struct archive* a, int block_size, uint32_t crc) +{ + uint8_t* buf; + int ret = 1; + uint32_t computed_crc = 0; + la_ssize_t bytes_read; + + buf = malloc(block_size); + if(buf == NULL) + return 1; + + while(1) { + bytes_read = archive_read_data(a, buf, block_size); + if(bytes_read == ARCHIVE_RETRY) + continue; + else if(bytes_read == 0) + break; + else if(bytes_read < 0) { + /* If we're here, it means the decompressor has failed + * to properly decode test file. */ + assertA(0); + ret = 1; + goto fn_exit; + } else { + /* ok */ + } + + computed_crc = crc32(computed_crc, buf, bytes_read); + } + + assertEqualInt(computed_crc, crc); + ret = 0; + +fn_exit: + free(buf); + return ret; +} + /* * The reference file for this has been manually tweaked so that: * * file2 has length-at-end but file1 does not @@ -312,3 +382,380 @@ DEFINE_TEST(test_read_format_zip) test_extract_length_at_end(); test_symlink(); } + +DEFINE_TEST(test_read_format_zip_ppmd_one_file) +{ + const char *refname = "test_read_format_zip_ppmd8.zipx"; + struct archive *a; + struct archive_entry *ae; + + extract_reference_file(refname); + + assert((a = archive_read_new()) != NULL); + assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_zip(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_open_filename(a, refname, 37)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assertEqualString("ZIP 6.3 (ppmd-1)", archive_format_name(a)); + assertEqualString("vimrc", archive_entry_pathname(ae)); + assertEqualIntA(a, 0, extract_one(a, ae, 0xBA8E3BAA)); + assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_free(a)); +} + +DEFINE_TEST(test_read_format_zip_ppmd_one_file_blockread) +{ + const char *refname = "test_read_format_zip_ppmd8.zipx"; + struct archive *a; + struct archive_entry *ae; + + extract_reference_file(refname); + + assert((a = archive_read_new()) != NULL); + assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_zip(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_open_filename(a, refname, 37)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assertEqualString("ZIP 6.3 (ppmd-1)", archive_format_name(a)); + assertEqualString("vimrc", archive_entry_pathname(ae)); + assertEqualIntA(a, 0, extract_one_using_blocks(a, 13, 0xBA8E3BAA)); + assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_free(a)); +} + +DEFINE_TEST(test_read_format_zip_ppmd_multi) +{ + const char *refname = "test_read_format_zip_ppmd8_multi.zipx"; + struct archive *a; + struct archive_entry *ae; + + extract_reference_file(refname); + + assert((a = archive_read_new()) != NULL); + assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_zip(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_open_filename(a, refname, 37)); + + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assertEqualString("ZIP 6.3 (ppmd-1)", archive_format_name(a)); + assertEqualString("smartd.conf", archive_entry_pathname(ae)); + assertEqualIntA(a, 0, extract_one(a, ae, 0x8DD7379E)); + + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assertEqualString("ZIP 6.3 (ppmd-1)", archive_format_name(a)); + assertEqualString("ts.conf", archive_entry_pathname(ae)); + assertEqualIntA(a, 0, extract_one(a, ae, 0x7AE59B31)); + + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assertEqualString("ZIP 6.3 (ppmd-1)", archive_format_name(a)); + assertEqualString("vimrc", archive_entry_pathname(ae)); + assertEqualIntA(a, 0, extract_one(a, ae, 0xBA8E3BAA)); + + assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_free(a)); +} + +DEFINE_TEST(test_read_format_zip_ppmd_multi_blockread) +{ + const char *refname = "test_read_format_zip_ppmd8_multi.zipx"; + struct archive *a; + struct archive_entry *ae; + + extract_reference_file(refname); + + assert((a = archive_read_new()) != NULL); + assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_zip(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_open_filename(a, refname, 37)); + + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assertEqualString("ZIP 6.3 (ppmd-1)", archive_format_name(a)); + assertEqualString("smartd.conf", archive_entry_pathname(ae)); + assertEqualIntA(a, 0, extract_one_using_blocks(a, 12, 0x8DD7379E)); + + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assertEqualString("ZIP 6.3 (ppmd-1)", archive_format_name(a)); + assertEqualString("ts.conf", archive_entry_pathname(ae)); + assertEqualIntA(a, 0, extract_one_using_blocks(a, 13, 0x7AE59B31)); + + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assertEqualString("ZIP 6.3 (ppmd-1)", archive_format_name(a)); + assertEqualString("vimrc", archive_entry_pathname(ae)); + assertEqualIntA(a, 0, extract_one_using_blocks(a, 14, 0xBA8E3BAA)); + + assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_free(a)); +} + +DEFINE_TEST(test_read_format_zip_lzma_one_file) +{ + const char *refname = "test_read_format_zip_lzma.zipx"; + struct archive *a; + struct archive_entry *ae; + + extract_reference_file(refname); + + assert((a = archive_read_new()) != NULL); + assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_zip(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_open_filename(a, refname, 37)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assertEqualString("ZIP 6.3 (lzma)", archive_format_name(a)); + assertEqualString("vimrc", archive_entry_pathname(ae)); + assertEqualIntA(a, 0, extract_one(a, ae, 0xBA8E3BAA)); + assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_free(a)); +} + +DEFINE_TEST(test_read_format_zip_lzma_one_file_blockread) +{ + const char *refname = "test_read_format_zip_lzma.zipx"; + struct archive *a; + struct archive_entry *ae; + + extract_reference_file(refname); + + assert((a = archive_read_new()) != NULL); + assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_zip(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_open_filename(a, refname, 37)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assertEqualString("ZIP 6.3 (lzma)", archive_format_name(a)); + assertEqualString("vimrc", archive_entry_pathname(ae)); + assertEqualIntA(a, 0, extract_one_using_blocks(a, 13, 0xBA8E3BAA)); + assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_free(a)); +} + +DEFINE_TEST(test_read_format_zip_lzma_multi) +{ + const char *refname = "test_read_format_zip_lzma_multi.zipx"; + struct archive *a; + struct archive_entry *ae; + + extract_reference_file(refname); + + assert((a = archive_read_new()) != NULL); + assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_zip(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_open_filename(a, refname, 37)); + + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assertEqualString("ZIP 6.3 (lzma)", archive_format_name(a)); + assertEqualString("smartd.conf", archive_entry_pathname(ae)); + assertEqualIntA(a, 0, extract_one(a, ae, 0x8DD7379E)); + + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assertEqualString("ZIP 6.3 (lzma)", archive_format_name(a)); + assertEqualString("ts.conf", archive_entry_pathname(ae)); + assertEqualIntA(a, 0, extract_one(a, ae, 0x7AE59B31)); + + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assertEqualString("ZIP 6.3 (lzma)", archive_format_name(a)); + assertEqualString("vimrc", archive_entry_pathname(ae)); + assertEqualIntA(a, 0, extract_one(a, ae, 0xBA8E3BAA)); + + assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_free(a)); +} + +DEFINE_TEST(test_read_format_zip_lzma_multi_blockread) +{ + const char *refname = "test_read_format_zip_lzma_multi.zipx"; + struct archive *a; + struct archive_entry *ae; + + extract_reference_file(refname); + + assert((a = archive_read_new()) != NULL); + assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_zip(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_open_filename(a, refname, 37)); + + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assertEqualString("ZIP 6.3 (lzma)", archive_format_name(a)); + assertEqualString("smartd.conf", archive_entry_pathname(ae)); + assertEqualIntA(a, 0, extract_one_using_blocks(a, 12, 0x8DD7379E)); + + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assertEqualString("ZIP 6.3 (lzma)", archive_format_name(a)); + assertEqualString("ts.conf", archive_entry_pathname(ae)); + assertEqualIntA(a, 0, extract_one_using_blocks(a, 13, 0x7AE59B31)); + + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assertEqualString("ZIP 6.3 (lzma)", archive_format_name(a)); + assertEqualString("vimrc", archive_entry_pathname(ae)); + assertEqualIntA(a, 0, extract_one_using_blocks(a, 14, 0xBA8E3BAA)); + + assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_free(a)); +} + + +DEFINE_TEST(test_read_format_zip_bzip2_one_file) +{ + const char *refname = "test_read_format_zip_bzip2.zipx"; + struct archive *a; + struct archive_entry *ae; + + extract_reference_file(refname); + + assert((a = archive_read_new()) != NULL); + assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_zip(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_open_filename(a, refname, 37)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assertEqualString("ZIP 4.6 (bzip)", archive_format_name(a)); + assertEqualString("vimrc", archive_entry_pathname(ae)); + assertEqualIntA(a, 0, extract_one(a, ae, 0xBA8E3BAA)); + assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_free(a)); +} + +DEFINE_TEST(test_read_format_zip_bzip2_one_file_blockread) +{ + const char *refname = "test_read_format_zip_bzip2.zipx"; + struct archive *a; + struct archive_entry *ae; + + extract_reference_file(refname); + + assert((a = archive_read_new()) != NULL); + assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_zip(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_open_filename(a, refname, 37)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assertEqualString("ZIP 4.6 (bzip)", archive_format_name(a)); + assertEqualString("vimrc", archive_entry_pathname(ae)); + assertEqualIntA(a, 0, extract_one_using_blocks(a, 13, 0xBA8E3BAA)); + assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_free(a)); +} + +DEFINE_TEST(test_read_format_zip_bzip2_multi) +{ + const char *refname = "test_read_format_zip_bzip2_multi.zipx"; + struct archive *a; + struct archive_entry *ae; + + extract_reference_file(refname); + + assert((a = archive_read_new()) != NULL); + assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_zip(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_open_filename(a, refname, 37)); + + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assertEqualString("ZIP 4.6 (bzip)", archive_format_name(a)); + assertEqualString("smartd.conf", archive_entry_pathname(ae)); + assertEqualIntA(a, 0, extract_one(a, ae, 0x8DD7379E)); + + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assertEqualString("ZIP 4.6 (bzip)", archive_format_name(a)); + assertEqualString("ts.conf", archive_entry_pathname(ae)); + assertEqualIntA(a, 0, extract_one(a, ae, 0x7AE59B31)); + + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assertEqualString("ZIP 4.6 (bzip)", archive_format_name(a)); + assertEqualString("vimrc", archive_entry_pathname(ae)); + assertEqualIntA(a, 0, extract_one(a, ae, 0xBA8E3BAA)); + + assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_free(a)); +} + +DEFINE_TEST(test_read_format_zip_bzip2_multi_blockread) +{ + const char *refname = "test_read_format_zip_bzip2_multi.zipx"; + struct archive *a; + struct archive_entry *ae; + + extract_reference_file(refname); + + assert((a = archive_read_new()) != NULL); + assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_zip(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_open_filename(a, refname, 37)); + + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assertEqualString("ZIP 4.6 (bzip)", archive_format_name(a)); + assertEqualString("smartd.conf", archive_entry_pathname(ae)); + assertEqualIntA(a, 0, extract_one_using_blocks(a, 12, 0x8DD7379E)); + + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assertEqualString("ZIP 4.6 (bzip)", archive_format_name(a)); + assertEqualString("ts.conf", archive_entry_pathname(ae)); + assertEqualIntA(a, 0, extract_one_using_blocks(a, 13, 0x7AE59B31)); + + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assertEqualString("ZIP 4.6 (bzip)", archive_format_name(a)); + assertEqualString("vimrc", archive_entry_pathname(ae)); + assertEqualIntA(a, 0, extract_one_using_blocks(a, 14, 0xBA8E3BAA)); + + assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_free(a)); +} + +DEFINE_TEST(test_read_format_zip_xz_multi) +{ + const char *refname = "test_read_format_zip_xz_multi.zipx"; + struct archive *a; + struct archive_entry *ae; + + extract_reference_file(refname); + + assert((a = archive_read_new()) != NULL); + assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_zip(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_open_filename(a, refname, 37)); + + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assertEqualString("ZIP 2.0 (xz)", archive_format_name(a)); + assertEqualString("bash.bashrc", archive_entry_pathname(ae)); + assertEqualIntA(a, 0, extract_one(a, ae, 0xF751B8C9)); + + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assertEqualString("ZIP 2.0 (xz)", archive_format_name(a)); + assertEqualString("pacman.conf", archive_entry_pathname(ae)); + assertEqualIntA(a, 0, extract_one(a, ae, 0xB20B7F88)); + + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assertEqualString("ZIP 2.0 (xz)", archive_format_name(a)); + assertEqualString("profile", archive_entry_pathname(ae)); + assertEqualIntA(a, 0, extract_one(a, ae, 0x2329F054)); + + assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_free(a)); +} + +DEFINE_TEST(test_read_format_zip_xz_multi_blockread) +{ + const char *refname = "test_read_format_zip_xz_multi.zipx"; + struct archive *a; + struct archive_entry *ae; + + extract_reference_file(refname); + + assert((a = archive_read_new()) != NULL); + assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_zip(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_open_filename(a, refname, 37)); + + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assertEqualString("ZIP 2.0 (xz)", archive_format_name(a)); + assertEqualString("bash.bashrc", archive_entry_pathname(ae)); + assertEqualIntA(a, 0, extract_one_using_blocks(a, 12, 0xF751B8C9)); + + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assertEqualString("ZIP 2.0 (xz)", archive_format_name(a)); + assertEqualString("pacman.conf", archive_entry_pathname(ae)); + assertEqualIntA(a, 0, extract_one_using_blocks(a, 13, 0xB20B7F88)); + + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assertEqualString("ZIP 2.0 (xz)", archive_format_name(a)); + assertEqualString("profile", archive_entry_pathname(ae)); + assertEqualIntA(a, 0, extract_one_using_blocks(a, 14, 0x2329F054)); + + assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_free(a)); +} diff --git a/libarchive/test/test_read_format_zip_bzip2.zipx.uu b/libarchive/test/test_read_format_zip_bzip2.zipx.uu new file mode 100644 index 00000000000..6d9884aeddf --- /dev/null +++ b/libarchive/test/test_read_format_zip_bzip2.zipx.uu @@ -0,0 +1,19 @@ +begin 644 bzip2.zipx +M4$L#!"X#```,`#TQD4VJ.XZZ-`(``)`#```%````=FEME,ID:&U-'E`T&C1IIIZ +M@8&J>$9":`IZ@``'J:/2&#```````!@`2FB$T:31D9"F@8FC0`\H^:+Y;81F +M6OH?UN^\U&IFQOP9(.Z29 +MPY-_:/D\-$@RAM/2I>(FNV$2K!!&6I"Q+LXG0-YLG/4-JQE +M=%^\,6JL1H*""1!1W]4\/.E))/*(3J)$L1_D*SPD +MAZY)I5-#7@)=-5H06@2PP-)YSB5D!.3J63\EJ*\X%RMDE]>>,6XG5O@<^@:T +MT671Y,.:/=ICJ(=@\88>!>M^W)$;,QY(*`F*%Q2YZ^%TF$V^/85:V;L\W53/ +M?9[-IX&M.O38C\;C@Q';$E9[,C'?<#304$$9O&4U<2"$^;O(0X:)KR2UT?=.E*K!>,09(4\>BYUO&*8;D4 +MY=51BK5L/;$+WB"S8Z-?)M+GT/]^;,7$S'N0)))KF]$Z(GL[.L=F*G#!4NC) +M4)I$P%W)%.%"0YPC!DA02P$"/P,N`P``#``],9%-JCN.NC0"``"0`P``!0`D +M`````````""`I($`````=FEM-]>-Q`H``"L:```+````H'J-##(C0U&1)Y*>F +MF4:'E-&0:!H``-``)30B!0]3TFDVD])ZF(::`!Z@`T9#30,)"0@FBGD*9-IZ +MIZGE,F@9``-``&0'`````#````````)$00FC1,(TE/T:,HTVJ?J3/5-#,4]3 +MU#TADVAYI!A\^>RTQ(BD3$3I$[^,/; +MLS`?JD6R.G5Z9H!*U7PV]92S^P:OR\HL0A0_3>$BI8)(/R)[BOOLC2%8YJD( +MA.HIB.`H&#CG`1Q7XZ?+[_/^7_[H%_86(A9>\@A,_,XZ=,XL5W'7 +MHP4M+H9TD.>'-*H@_0+C17D%)P0)RW]IX%[$"C`LP1H=G0_&E!1":MS%AMXQ +M.;)R8XI%.5\07+:U'.MSO0WA +M_'JHZ5GH`I2IS+,0&673'.$+E#O<4'!QIOJ(=A]MS$!DF&L:[UWD`N@'J&ON +MK"9F:A.4,3W/HWR,FFXKDW_PF?19FN: +M8JZ13*&(2TCX35$",$])GPS2BLBY&_H+ZVL;>UD^#7W[$P11S-:X+;I&4P_2 +MY2FF=^Q>)*S'RAW&B*<0LART09,I)(!+E@9";HI&D+G>(HXB:0(%/23%!+S@ +M:NR/5*I%\9CK3:PVI^LQ +MG?A&-RQT6VZ[1QPG2Y9UVU<7W0N_@W]>`:ZV*-IMC1AC'"5PL)0>9PPF\/OD +MXXHMUZ\%[^LVOK%3L]),1F)*MOQ5)H +MBOICX8!01L:?$[E<$R]4`!$&F!"N^Z]2A2JH&\;#/"R\MG<`!XM)L2.#%FT@ +MI$"[T#%/63SY#K=LX#RSQ*%:^J%L1'HN2F*:.2LQ2X?"^Y^<9@U@PV(.4E%C +MNS^(@N"/+X8L:9,0T^E]7FZXBP#;&@K7#\.W[MOUG8-`-,<'O._DJ$2@7*?N +M#ZO8W/#Q7.=4KXGTY)7:1_NJ7-3FF\<6*$<^N]+-!C)W4+U60F$@>:,&%)NE +M6U9J0*F"F4!.R,Y`.G*"*P_*(P>-<7WV6=I7*E]98+HG":L0P4Y,?%RY^X[* +MDSY./:P;H^X)PU&L=#"2/IS$=1]X>MB:/!;68(&.EX`-X^"ZW_F;B,/8(F45 +M`_?U2XOBQ%&$HQ1YCTEC[_A42U:75Z>4-MI3.*/X>72/`C"+5Z>0B4C49O3A +M'N8'1IOJC95M$"-@1RC6(E"P.0[ZZ:;J?E0+#0?LP-6OA,=$^`?$]R)HW%K-',>W@ZBCI&CX:1\Q,YOY&7QLLV_J,<-^S+:AAO&'.A;*V +M3AQROQ<,BPB$?,M'!#1QR@/4P#TM5"<*D.$6U5>6NB0:@2#RD1AD +M(26E6M&U7;I&M?><,-FD48N@:,:2[U9AD#8DO`F8G`NH2VG:1>/.?`E4V(J^ +M#5)1$;.G/E?/U7S,RZJL\[5*#)Y+2^&5-1.L.G9B!:4)5`06C6IFT)6E9!%+ +M2NC400RE=1Y`*MU7P;V=>_94Q:13C>/8$;NI99OUE6QCKTH&-VKEJ+?SICS8UQ>&55 +MS!AN47T#J*?QEQ6,M2+`@9;Z#.*GM;18!J.<)J@Q;$1#.I*.KS4[,-,#$>-\ +M)PO&"I/*D7`.K'08;Q%TA8:$`G^HT8=;C3N5_4;IF)[F0N$3TDH*Q0-P<[42 +M,33G53,R)8(253>'`I(/(`C4Y$P=LZC+Z#,LC!<`8.=$0&& +MHL)>D\@AT(1A;6.-(URT`6J-@0P%Y020G3VP:QRCZPM[@QB;&[&]I'1L3HG6 +MHMV_<`9H[#0AJH/K\46RSPQC,;0Z:L!U8.MI-D=NTHM!=_;/K:@AM+BTH-:,C49$_(R)IC&^1Y +M[EI=1!L8O8:A+.F,:91'!%LGX.Y*[9W'8Q>2DN$BIBQ(Y()FJNVR"%T-;E6?@6^'&XMHR)AAWR$C9972+M#HSN- +M);+!>$HVAW9>(FP:U,1BUS180R6S:<$UD99OL@!``!4`P``!P```'1S+F-O;F9" +M6F@Y,4%9)E-9[2!3B0``35^``!1(A>=")0$"`+__W^!``=V;:TML-3U*>D]I +M3`F]4]-(T9`T:&@T8I`F(U'ZIZ@!H`>IZ&`````P``"4T4R9-3U`$T]$`,0: +M9KMEA9T)[OUF#<]_H_:R!%VW(Z5<8GI,92Q$\@_99'.V;0!9M_JQW(Q83@(> +MU(&RK]97O.4,(?9Q\O->C6^K@,2&/*Q^ +MQ&&;:95MR[%C&6KSOTJXV':CWZ0#H?FO+]L:K$L-@\J??66K"[C''`Q(05A5 +MAA`H<]68]Z8WD3,92%/*1-[+TH^&FHBX0;F(=TXT`&\A85\I0".^@K=W'6BV +M6W7I=FG(:-GP/>J!T.)K9&E&`70E\*O7<@B"XC8N],ESUC>J<#0$&LF`$Q"I +MPFSXM(AMXP]!B.B$A)+*A@FQI7N,888C3<<1)$V8APW#69B,^)R&)J6R?!9U +MVQ?F1G9TTEH&P"PV"U$F)!()!JGQ4/1670D)VLF!P-;I.\BHY"F((+!C$AZJ +MZ4L(AR$T$'4N]0NK3DSCOKGQ@9(O?$J:V8>5Q'*QHI5Q[SIJ>$D/XNY(IPH2 +M':0*<2!02P,$+@,```P`/3&13:H[CKHT`@``D`,```4```!V:6UR8T)::#DQ +M05DF4UGG",&2``!,7X``$'3GX%(B(Y<`O__?X4`",3M`&&IZ4RF1H;4T>4#0 +M:-&FFGJ!@:IX1D)H"GJ```>IH](8,```````&`!*:(31I-&1D*:!B:-`#RCY +MHOEMA&9:^A_6YQ\4L6YH5-&(7`A#+:]%S/&IQ$[W/8H;ZFA%01)[[S4:F;&_ +M!D@[I)G#ES0BEILMLL482*!*U'E=+=]T@I+>3,SRC1"`R6N_&W"N!*6`ETU6A!:!+#`TGG.)60$Y.I9/R6HKS@7*V27UYXQ;B=6 +M^!SZ!K319='DPYH]VF.HAV#QAAX%ZW[ +M2:!4(E-IS87%O<(M4.)02QAK:[^IR!Q:E(+-CHU\FTN?0_WYLQ<3,>Y`DDFN;T3HB>SLZQV8J +M<,%2Z,E0FD3`7-]>-Q`H``"L: +M```+`"0`````````(("D@0````!S;6%R=&0N8V]N9@H`(````````0`8`(#N +M7F'.E=0!`,YV<T*``!T)82Q1PWAL +M+U`,N0L_$]^&650C/X$D6#4QFD$\A/"_![4!O/5O/!KH`WCQ*4?T2*]4P#/D +M0'9I?EZG=N69Z0V;H0I=CP*$?".I\ +MGMG/80.A'^W>R4J'S/CZ%P`8`>F=R>R&R$2T@EM#X)"OQH1?A7,`:4IU9WV! +M#2W*DXT',;.4YIN4A:-X)O=IREL201ZSOC=YSAU[C4-::/YV8\)%"L17+>VC +M%/'B]ZCQN$2(Q*9*\KJZ`Y131`]5C&G';@1S-QES_RZF!2OX45@58+??ES%( +MUJ<(\`11M$NO)HK#/MK-9RT"15.2I:IZN8VTM1_?$G\L#BH67]$S%[4 +M%C-$\Q<+./&HV](4,7)OL-@C^M0F"2O!0N$OHOW54H87^QLBQVH*D%A<#SI% +M/#+-5U(W';:KC)RE>0Y^5YI!RECQNR"R4.UW9IR!@:B!UB8?_D5$FT8YCJHJ +M2[2"-&-_D2BJ6#XK[6G=%K"%;'^-+0]FHCY4ER#`^-]>-A`H``"L:```+````?C+)VT(T,`U9;I1/'N)!6C.3M-O9LO]H<^=OZF\ +MXD\QEZIMP!JM4)W+(F]%N;U#&+F]WZ%S9>>]&^75:X)0Q-5>)IRT/%INC887 +ML0UF,PS8J@3;WGH95EW4\KBH,6ZK9,'![3$-(WL/C<[0.6%L9@]0;]_>&[A'#Q^##`6+*PM@YT+]E[B9 +M;QK38:)CO+S]4X2KF'=)BVO-GM+_#]PB0'*J89+$Q11IG\\+84M](58^RW.E +MGM3S6RH]#73P.KV0V,2=,/A!;G>2Y4R!!.4(0U+D<.'5V*%./ZZYD")=W4*4 +M\SL"IUJLN])WM`-&4L&!9&U_-1%%SPU0N4*.^L5&33"A5MZ;@8F+I^D7JB'K +ME&,P>"UI]NT;ZX`UU->]V$?KZV(DX&-6L>? +ME,(J<;DNA6/2.<9$>?-P]_!S8L3/2P@[C'H$FOA$WUOB>;X$)E7;K+7#X9M( +MM+H^AK#CML0LXTO#P#I)J_5"@O +MMGH9+OXD^P7AG@<%QGH<>FIWE]/>L)6Q2Q\CC=[?TCG262"1EO)'_W_C_K4< +M8.7YNR"3\9WTQ"0-!>^,M>WT.Y0A&3AK.4/YN5EW]"&HQ1=8Q-NHQABVH7$% +M24Q:J/<8%F1EI)-!HND&FP_;]E,JI$*AGE&]Y#FJ5R6<(UM;;>U?KU@OQ,M_ +MI/+:;0^0'8JA"==H5/;]Y10P&PS,1_M+D1I>`%>_Y\&F[_R/##784#0AQ1!O +M'8>IBY?;&;(VAOI$+Z[VMI>]WC!['W$&Z5/D+"6["[[.[P!/?9C/;CD66^J/OC) +MO0JXP'Y6]PGW8N@X(#-XXWMP"A%E$A8L+X>QIC[0P,UT0A!IYQ/#+>E60KX/ +M2JC'F-Z@_,-/$7+F5F$1\#^"CTSY;H2EAE,/8>F$^'5H!M2@D?2O@+NJ2&-O +M6IV-[<6F-JG0(,W-YJ%PJ<7T.F")'/+F_`F6B-HG436 +MJM-VK0?>9*R?K#]OHC90!]BCG'^D&%!JZ2T_&[RWV?0Z?T:#_\B`?:MRC$D5 +M1Z.)!/;3/=GT*T;DC/XLR;7S%VEP+U-M)-*V,1?M<377S9-`J]S4[OI4SR>V +M_?_H2WWH>\@*$V@`LZO?-'Z[4_4"T2R@=%! +M!\1]:4M((1B=ATAJ)23HE7>]=,7I#SK5'2OUZLX0LX*_67!+6S:*0UL)^GS6 +MM;$2!(-TY%4ME1>*'M)S'[0JP^&J%"-&A[C1YE;1:X_4Y&K9*2 +M'14NC*]O(JXNLX#M_(LDE6$G<0,+V/;44]-*^S%7G*.\$W&>2=9,EYB)/@XK +MX+E4?>NIP=*X]Z>3T,V?!"5:/M@X-H%DZEV,*9WIK;T_C18UC5$%^Z5PY8;USNB?S]`D-K2V2 +M-C#0-45C'YFF>#*$9JZ!$>%0;=VQ@LGY"J<.C.10"1'JL^B(VXPNK^]Q97E\`HX0MG2)F=>38R,IFGK\R +MVH!.V3_)_D,II8A=SL;X1!2D9E67=.$P+_D6&'X^1;4%:`ERV>!,''7<#=NE +MP')[\,A89G35\;ZN:IIY5DDSB$(8D$[G>7-L!(+(-7=LQ0Q3=S`L-)R9YT;S +M;%ZYVD%6)M=I]*QS>!3='"SB'E/4>J#EQ.&]0*U0H/Y1%DM].)E/UFD0?Q)B;_;;)-K_]6Z<)D_>^[=?49%4D4B@REC` +M,_KU,KX-N,44CL/:*^D>7X(;6_1&C;<;&`K3+4NY[=4%K2K:U"Q%T9)',7," +M+PC``PBBDVNY9V-LN'V')B4@4;OH(\/M%UBX$S."4_!;&=9W\L`!2O\!ANM5 +M8+""#5J&/M[2;M?47WN=0[4C*$7D#QB&`!*@5_0 +M9(5SZ7,Q"N?0W!\0WGQF,0JKH[G-(<6^PMJZV&VHQIT?VKJ^7^!EW?)`EC`$ +M0%,=?X0ME@T_&5UXJIJ9)5(<[=W]M#Y.$.96:_T8WM/H+"T_Q;QZN"`5DC&' +M.+(^")0XO=#&#@KVL2R9+.#F)P02=`6PT[21&>T*Q3Y2WVL><:PHB)R?*'+3 +M+,OK(7PNXHXMVFT"!)??8!H^I,&V +M8BOE(Z/VYY(5WLSG$.+8$;.A>]+PVB`WV,9M@YL='Q_<%2STU?_IG.I17G^L +M>#-=')K6T?C[^&,7Z)0!B[2-)2:&?*5, +M,;&-HI_/O(/V"4-:_E>0Z)=C=1/9HQJO$R:(&H@4RP"3%T[BOBZ`3_2>)K49 +M\ZL-26)MQ\%(>T0S5A4>EUCU[),JI"_/9<5)*6X\97`N:8YSR0%)@*#_'9'? +M/E=RFK"L&(#RS$S_0[?/!DW.`\FH$M\#"JSQ@*&C*W\`4$L#!#\#```.`(@X +MD4TQF^5ZO0$``%0#```'````=',N8V]N9A`"!0!=`"`````1B`2H,L'S88)4 +M*WW.8!PX^&C9J^P?<4.RSJ^WF7L-W#+OB4@(A-83?&.PVF@$.:VM<4\8K(_6 +M`CRI/@MR:,)C8$&U*MZKA\W'KCI%E2GDYYE/SB'*\S@5]NO,ZBF!;HY@ +M3Q=F"]@P02:6^LC>JT$T7BB(.:*G`I.?%/53CC#4I0N_ARFXFG`DZSI*$)UM +M1^X:IZ^^)T*5E,C6L<,L%=ASF-1:0CP6*FX_;?R;#]C:/ +M)C@@M]\FLU4*%H^>8VW4@@%D0U!02P,$/P,```X`/3&13:H[CKHY`@``D`,` +M``4```!V:6UR8Q`"!0!=`"`````1"`2HPGR1L@-2P5SX3!72_Z.U"NE"C>=! +MB(;_TY1PR.H(G3<^QXEA+%'#>&PO4`RY"S\3WX995",_@218-3&:03R$\+\' +MM0&\]6\\&N@#>/$I1_1(KU3`,^1`=FE^7J=VY9GI#9NA"EV-P&-RI:_YA-3= +M!E?#E$05TTLL[G6$<%[`H1\(ZGR>V<]A`Z$?[=[)2H?,^/H7`!@!Z9W)[(;( +M1+2"6T/@D*_&A%^%+WJ/&X1(C$IDKRNKH#E%-$#U6,:<=N +M!',W&7/_+J8%*_A16!5@M]^7,4C6IPCP!%&T2Z\FBL,^VLUG+0)%4Y*EJGJY +MAS2K)[;2U']\2?RP.*A9?T3,7M06,T3S%PLX\:C;TA0Q; +MZC0FI6JUP3:]D^3L->OKP>:G2P_FF8)V>\LVQ^;MC'-2N-,GXD"Z]PZI8A79+7ED*DB,K&N'OH,DT/C4$L!`C\# +M/P,```X`D3B139XWUXV$"@``*QH```L`)``````````@@*2!`````'-M87)T +M9"YC;VYF"@`@```````!`!@`@.Y>8EJ>3<8@F_*<(\B>K]4_(WC8#)`_QSG+`7`B&_11VIJ)@#(K)_I8R^=`VUO2_S,1C=1CAU>-*`]CC+&6Q;EE'#CG-W=^[,F,+UR-TE(9,G +M1_&$NYD`,5WY5QX7@%5XX-*7+7&#W'1#XF+"?6*U!H=B063.]NTA`1+&\J(K +M&U]DE8$ZYUU'UT\$!88I,6+(M/"=_+3@V5H+24 +MBA0>^0?[W07K"B%SQ&OA-R)S9W>DH,<4##E@?5YF:%K$?5U@W[IVG+2#8FC4'GT3-$L%_N,B$^)UY1GAD=.Z(HI#3[T"979&<`+ +M`S]9G+PI';5"==`18UE,>N"?>"_C"MSHX<[!&8$+A6U_7][TK*6.^\-O-UK! +M`)>[*.D1:1H!I,?PEIPW,NQ5CCX2NCY+%UH"T5X!O$&=6+X#"()33:]FPF:F +ME-O)\652#KD$^ZFBJ7K`55]R/A"OA"T7\R6K%B(2&6*/H0>*-:@< +MJ2V9^KJSL+P4A&$`4$L!`C\#/P,``&(`/3&13:H[CKK8`0``D`,```4`)``` +M```````@@*2!`````'9I;7)C"@`@```````!`!@``)=1P<:5U`&`PJ3,QI74 +?`0"74<'&E=0!4$L%!@`````!``$`5P```/L!```````` +` +end diff --git a/libarchive/test/test_read_format_zip_ppmd8_multi.zipx.uu b/libarchive/test/test_read_format_zip_ppmd8_multi.zipx.uu new file mode 100644 index 00000000000..6fadcea6493 --- /dev/null +++ b/libarchive/test/test_read_format_zip_ppmd8_multi.zipx.uu @@ -0,0 +1,84 @@ +begin 644 test_read_format_zip_ppmd8_multi.zipx +M4$L#!#\#``!B`)$XD4V>-]>-.0D``"L:```+````X%[[99#N5_N2Y@T-?"T%*8O@,-K>QSOR>-).,0-Q:=7)+:*Z&:B9' +M-YXDKOWK"T?"(.;F$3'A>TW+.W#W%Y0`:_5XI+35&4_L<0>95Q!XP,\T=VCM +MZB%S4G,4ED*-4T#5Y'BUCMS!0Z)0*50U=EQWM_F]2)<8#VF(432?^.!'O`I/ +MLP!A8AN1>8;MQ:S?/4-$K07X@["K?T]0<=84:OJ]Y$.D)3MNMK`U.(ORV3M& +MV`2EDVHT&D'F-=\,_"^;UGWW!=,KJI4^NZAF72$S=N.*PW!#ZJ?KQ0;$7E!] +MFC,:ST/]Y(,:K3SK7,%&A_%1`($9H@/\G.RN$J7BA?C\P"[U&[8MITK*`U5E +MVH'&CJ8?%1+H-H0=B*8]_(F;;/P`371(@?QN2T<4C52L;%S-XV),E4-V05&I +MA.V?U+HM63;JH=1]LJ_E"@JM?QH\6#NQ7I\I*%-!)LWB?U):O^--2R=1C):^ +M^@GFC0GETH%>LXD8_D*J'KS5[/M5+89M;6@3?]I.`M=;(S7*1?(?.9G!`S26 +M([JDK/(RCM%WYC`>G*L7:NJ!5TS[7MI^E[4/\RU9$&1XJ5#V;<'F^AX^R@P7< +M/2?UB;-2+I11]^X;,J\:,'^R=&8"@[EI0$J:":*LQ-F(IOSXC?1W>/;?4$P* +M;=V"OEY@S\%3'&F-^K0FME.]%1DTO*OP8.:M('3W)\S"1.@8# +M6Y1&@L+#!YVL]8.,[UP/\HMAR1"O!M/2?DY-8[N'P[T&2J7-V5%=XWMRQ,EM +M7TL^99!FXY0+B1^7K36"#!85"WXE,WFEU=B!+GX2$D<)MW>&A,8A9RPL]CR( +M!=1JX99-:5D;PM@S4(>?BKTFAQ>"%9\Q^Y\6T\ +MTS"*[M.TW7AE:#\(:M_JLJ#*-9-O,UW\T:PVW8G01OR!B59P=[:XNH0Y=OC9`KJ_%.>^)RW-,G^%%(U9HC957G/\@ +MW1\J*,JLX*CF*,-!'^O*E7=V#S8_#G`9YQ)#%,AJ8"-4B,OR8RW4@L)8F$JB +M.3QD;;M_L<:*T3B>8\OECY@*'6?[2VYXB3B!`<:65*HB/.^@;]\`IE)HTIG=_X:*3A'#M\P0L*]^ZFWC\UL`O;WL@#?"!(_]OZ4=B<\QL/ +MWD%/TL5[P\,^_3G\]WD-Q`?XU]K5LFW +MI6J$G\=W(Q+2_U&THGA'_6)`"WK=X74-OD_B%WJU+JH/A"K#JQ@)A<`>A6[H +MC2A>S7]ZV/H?/1KV&OR8B]IS\>Y:\5;B-:+E^V`:5_O[+(!,6LW0&[5(F4UH +MKM;R]_V\?PI6I+"7M1H%E':WL<1#J"O`>]S*UKS02ME,S7]X66@[B`_#?H$^) +MT&07IY[)18RK+`@#?UXT=8B=XC$J'SSJ>XP2RJ]>02[1J:R;0"M&L>KY(3*9QLH1N@K?LL%3J%O? +MR!>+JHS'L3_<*?;*81D1'/A-A"%%P..0%#Q32?L88\C'/'W?D +MG766\,!14W_TY,.;G,\C.+L1LR__7UR*_H!IE,1LZO,_+#?C[OW4+[4I`[7] +MZ;U%`5#@;6A5#JU]H+\E;^^LQBHQ$-T.P5.V/JS69'PI06.%#D4 +M%B.K!F(.G*6@/>[P-FU3-#%N.EY%W*]P_]<>?07:7 +MK&JYBO\0HXN^[/OA#_MP/QQG-+1?Z)7*WRL14*DMWL2795*UHMN/'POIN@Z= +M&&>Z/LT#HX00EWLZ%/G7)X<":!ST&P+Q0R)=V6!#D]TY1RYO!C&5&''Y<9UK +MK=LY46W;=`;(^L<>RMMG%(;??BF+AY,7=+H)6XSZ:2#^`W=RR7KIE>XQ/[_^ +M^$F4*967)DE3/^&\(7W?:EKRJ,PN@HOT:U_.!1-"7A-=LX\Y.=-37_355)/2H +M5V[LT(83$V*>?^YW7'6.SD=MYSM<`[M'IU]?H2ZU8\SLD+R-Q__8G +M2A&2#H5-J+*NQHQ&ZX$YO_(.OC\W[3B6WB#"MKV?*/<=(JBTKB`D_X'X+7"Q +MJ$B]N)L:OTXC':GK/!QYSR.,%%T3XI6(C(3&XZV`.ZLS=\O7>0 +M#N;$J[W)(Q.5<_I5G1K^FB10(.'.#Z!,B8Y'WX_=XOM8\+X/;Z\!"0HTWZG# +M[:0<%(2)`J>6>9=.B5&CWX.,@3RF>FN>'-0))N"!IY,\_?'B`DNYJ4'+!JK] +M\NX-@,,Y%)>#36Z7>CBBL=0S<@W(QRXPD3:VF-W)CS52HO!/T%NV[$0^B01. +MZH\`/9ZTF=$,",`0J\ANSPF&X.0O!?$8\^M%XSM$`D)W".C60(#6;('?*'.` +MTN9>^YI5M0P:-7MCSU2HCCU'`[!+^"9P9BH%1<=K_B#UZ3CDJWX+V\C^S$GD +M]M3M2VJY4''"]RN@>]D^K6B7ZGQG!<4'LZCK,BU]QH,1G(P$AB-W/G%7[.`! +M"P[DH--W#`"^#];7=H09``!02P,$/P,``&(`B#B133&;Y7IM`0``5`,```<` +M``!TCE(/-F<.B*/H_C$3G..AVGS/6(LHZG/Y[UI +MO1E2,[]ZU6`C\+WOIWU\>8)Z?4>]`A@"D*I3B52YGN&M&ME*-/]I!1,P.R@8#3-@,D#_'.`57C@TIL*(7/$:^$W(G-G=Z2@QQ0,.6!]7F9H +M6MRJ>G_FVK['10^FM,>V7UX1]76#?NG:?1,T2P7^XR(3XG7E&>& +M1T[HBBD-/O0)E=D9P`L#/UFL!57W(^$*^$+1?S):L6(A(9 +M8H^A!XHUJ!S!KSA%4.]<`XB*.5^_Q`%$6ON%MH"W>Z9/P^MG.I)@N0WI*EE+ +MX2HV+?E8)N&["2\:UYRI+9GZNK.PO!2$80!02P$"/P,_`P``8@"1.)%-GC?7 +MC3D)```K&@``"P`D`````````""`I($`````8FT!``!4`P``!P`D`````````""`I(%B"0``=',N8V]N9@H`(``````` +M`0`8``#P/%?.E=0!`,YV<5U`$`-+O[QI74`5!+!08``````P`#``T!``#O#``` +"```` +` +end diff --git a/libarchive/test/test_read_format_zip_xz_multi.zipx.uu b/libarchive/test/test_read_format_zip_xz_multi.zipx.uu new file mode 100644 index 00000000000..cbb0434691a --- /dev/null +++ b/libarchive/test/test_read_format_zip_xz_multi.zipx.uu @@ -0,0 +1,125 @@ +begin 644 test_read_format_zip_xz_multi.zipx +M4$L#!!0```!?`$JCP4K)N%'WP`0``,4(```+````8F%S:"YB87-H%H<2H0"6A +MIYB7=GGPH!_8+Q:$I09*"\)&%HUP2'X^1_;,\T5=!'^A#5>U*#)2^Z>L/*E5 +M%Y=SZRY&`Q_\(3_!//$I\E.SDZXL)WCSE7*0<#ZW/W*WM?^3CV,,)YEK01]T +MT,<7Y)@5O0':C!S_0^WG/9:,_0!\NG>*V2&:&Y6HU["3TUJ[%2F5'QVDJ49T +MXGT,!*J&0,&"B11L_MOP3I7+N6GR9P(1N,#/>TI'$'P9K@^7G77Y&\%&7G#U +M.(1KPE@G:PEDVEKT='HZIP\2CZXN#7@$BKO\_/`A&S$A2;^3%0VJ0B<>6Z7* +M'(W4;J5SB-RW4&#Z`/N2.@8_6$ +M@.Y1A%M"F_0Y9)K*3PGD$TRK,@"9C'8J9SEZHM`Y$LI/?PG%_42+$3TP@+I` +M+:FR6X1[JU^A\'RX,7__;W>6RC#^PQ8?M\"W+\9N+5D6E<,I6YVTKS#\$JVA +MZ;K0DY^(!*H9_',HH[YJO:)!+$6'J^.N'A`#`W;24DHV[N/$M_I"<*S-?4"H#MX!C`%KOF),."`K?#U%$R3O$^-3L$B +M4!;(W2EW.X`!YA8N;NJ=9MQ+0L:V/,NE<$C'^^VK:'5:ISTE3397ZF]A-RNV +M_0['T/F3^RM,C)6LKU.-W*KX4K%_0E/L1N*_EGPJ.$1^?N[5CQ%1QVWW7=PQ +MRBO*[C$);8)1VXE3/D#0,<6UN<]Z2^" +MDK"/$_T4Y*0'L%^0ZU9V*83*/A7Q58HO'/N:&4-[4C;Z"VG4F*YB&&-DUJDN>P.WW_W/(C8EWN-?5&H` +ME(KXMK!7NU+"*$'9`APUBFPA)L:VI),V)&P9=E5XX@LEL0`6F[B]V<#9XN[& +MRCL*9L(.?C[(@XE%VY>?B<'!&6!:!_F-/_H10MNL_AK01%_7!^2_ +MK-;P"DF0A;4'A4:'>AL55U%9:8*0`4^?W8+9DT"V0RD8LG/39(VE@V_M&V+K +MAMS@CU$V!O(Z>CG.!!.(;UXV'W%\:.5'^#G(S+5'+V!>C07US+?DIUL/62Z: +M&B4VHH.FA1S9(^$W)M3*NTHC:"FCSF;3$8P4Z@>L4)!K1Y*\<:F0-]$9L=HB +MY)E#TDD5?I!S4M5OOCCC'R/!D.`9%'K&$F[4E5VH"YP$-,%9!F'3#+:9192$ +M+UB^3S-M%M@;228EH`````````&9"<41``!4)ENHJ``*_`(``````%E:4$L# +M!!0```!?`$DZ/4N(?PNR0`4```8*```+````<&%C;6%N+F-O;F;]-WI86@`` +M`/\2V4$"`"$!`````#[C.DH'JHS8W0 +M#?$V8!+]F!+4KI<$@5HR#OF&_!+?L]N#167^N/Z\RR?GT6KQ]?CNM8>==$!O +M=?@QVU.*S@O@?_!ZYES=OXHQYJ$K_DR%MMLC^2[U):T3="8KH6E\^#TM[(#:A4]+,_]N-'V@&OOT`,J)*`1$6H.A\AQ<:=18H%@ +ML;39WZ/[&[?/?SG0:`)Z@@QK2DRGY04F&/;-K*Z2%^"N_BP_F6O6MO(I/U.T +M,L^0$-SQ!\_E:AA7X(VUV.Y`_]<]=`A6(D?^?S$N7OB; +MT(0U)^NL7%.Y4Y=[UNW8JWFUU2Z%N5G/Z:2V?%%%6/';]$%?%<3`>;=\3"TZ +MOQH>DR`T^1`8>\&>-+PCS8;)$M]WJ2O3&-M5NR) +M$WG>P*D3/:ZM,!QAAXTJ^?^DMH4:%0+):![`U`''+%K.;9,DM-OWB^)49!83 +MO-XVJ-N?)]_]87HXQ6(V0%4!R+DSR&(F=2*,KFTT2V$[`UZVVMLSI3T^B.%" +M3D->'YYCN2Z9EC%"/S^=W%139%7:"`%-3":\73B.TK]C)8:O)4=W6=.WU7J2 +M@Y#"B90[N&9_-Z@-5-#!_EI#N]D^NX!(OD[^I4G/L<]:Y&KD/P[5G?Q)6SR. +MDWT*.9G^KK"3^&1]@*+P6$#\Z4SCR,)E-_G'AVC:U4C%35O[,9B\$>QS9A// +M4-6)6TL?R;D":$KYVE+4" +M&J><3=ZH:XM&"9ZG5-4HEZVM9YO'8X@P#G1HSFI) +M2W.U-!_N1K4*.N#AAUV?:`+5)2Q\ST04<\TLD-92B`V7$2.54`>G]G_GE!`] +M_IBNF"]3MS#G]1S*N!MZE$UQX#1ICZMB[T'>_#A&8:3CP,B^UU\D$[3QNZ\P +M=]=4'BLWD`?H@Z@Z1;^A03I/VJ2$9]*>-&-,(=@TI22.6-'S#KS*+AT&AY64ZN7$Q'B,&TN<$OCO2W@';2/5 +MEKGSV4@OF10@NP4EBO7\Z_Z/ET#+&R::*OH_-<8/2\VF!^Y["EA#=>3&>K=? +M3^N#$@0V@=Y#K\T!ZU-B?E02#N6U>="G!A6&D?[?!.55MDR7@#`#^[T2I%,A +M+@+0O`/8^4,FN!XRSE;S@C93NICEP.5CEB_787X1B`&3L2FW@E,@?%HW,)36 +M&;CMA--B'7O,LRD\RJ3&]S+/5/3#/?F.Y/5_]51?5DUT3'R5UY.+M +M3HA.NI":EB`FI,"L@%9/?/J.E'FE+AIF8,A&N_G:M,4%DNJM5&/OK\3N="=5 +MYWM9VORAD0```9P*AA0```_X7#"H``K\`@``````65I02P,$%````%\`2J/! +M2E3P*2/$"0``R1D```<```!P`!&(!PA/3V#P('SF^\W3.1N]J/".J]^]]U@ZBN`&LJSNNZ7/6,;7A:'$J$`EH:>8EW9Y\*`?V"\6A*4&2@O" +M1A:-<$A^/D?VS/-%701_H0U7M2@R4ONGK#RI51>7<^LN1@,?_"$_P3SQ*?)3 +MLY.N+"=X\Y5RD'`^MS]RM[7_DX]C#">9:T$?=-#'%^28%;T!VHP<_T/MYSV6 +MC/T`?+IWBMDAFAN5J->PD]-:NQ4IE1\=I*E&=.)]#`2JAD#!@HD4;/[;\$Z5 +MR[EI\F<"$;C`SWM*1Q!\&:X/EYUU^1O!1EYP]3B$:\)8)VL)9-I:]'1Z.J-8JFWEH.CT0J&T-]!<(2-ZX3@)RN\J +MD0J4H/6N:=KFT$;\&+`]@#Q%F@SF+-%(65*CB7!$HAZK&W*TS\4$TS&_-!XUFUI$7*A-;',>)+^5/P,ZD`[HWR.A9G^#C +M-6KTNK(S\@+HL$6:2FA5[-R2LQ[3WK4@G[G^^#%XET%6G82[QS;7(S,^%>P. +MYKA-5S)0G[)N+EKS^1K@1W;SU#4':SY-Q*(HA+6WR\"M,7!$$QQVKNVE\P[0 +MBBWM8'\!):GY@^BZZ/>2OU-EQ9/907=8"CMA,Y1-O?0VM$[ +M,QLUL^VK@(FD@-<=6)L`J(Q]$R2&TIS@=/S2CA0:)DA&H\H`$K>#+BZ#L5!& +M]8O1Y2#,@L6`*Z_D^1@3[G%I48$DK(:8@ZH;WXDD0/L?(I^Z'Y4Y+\D8XG`( +MSAG!!38.H:+L%5M[]Y^`-CTFJ#CX<7/IC$#M`L)E=Z68`.Z15.!\)150B=YZ +M<&XZF>5(BYL_(:&P;,V^D';WU2KR?A0+4-E:0%O1F9`\";RR09,+G:>77.C7 +M]KV`K:4D9*B)Q7-\>:1&]N#JUDJ(<;NGJG2;_\!+&29%3S'E+/YN\/.CP_`+ +M2C&6BOI"R]9V%EB^`F/J&*.2?`KS5D\7+CM*1],_W<7&0.MI\<*%EQY7Q@SQ +M]J35[E'P\:C"YF9--<<]IB4/#I9@9S3^CAVB3]UU"M=C#6^!]'`R!N<"BA3D +MN!$ISF;-DTGCY>18H,[#4UN),"U;=7J/F]7_C]K`/LRZ> +ML--U<[]+X!IV+&GCQ]@_E!=D2Y1SD_+@R,2:B,26E+GCGX,S56]3OA<@OBVL +MP`N1,^ZJXI1U"XKCE:I(CGLU!K\^_VU7+TJ2/;81PC?GHW(SPC^[!L2+DA`2 +MY\B!@>P$N?Z%:.G%!3DJDM;N*"DN*/0 +M.&`?D?G/MZ-/H^LA*3<3>4'L">#EY&"!H9TB4M/1;]:<*%/-*%*ZZ2,FF"T` +M1-VLYD+N +M@QH1*4'INB@E6X<9,1=\T'`K<_PIF!F*CV&59B&:2$-.E\%OM2!\T"\X#?W) +M_\E.2DS4TN&FVR:GZQ#7+A4-!3L*KI`(NX#-0JA_OH3'J@OJ/G'C\>/BB@8\ +MO9AR=;IRPX06AZX1'^R/RE>AE,'6#4](`V?FM0=BN]-3P7I*Y1N1VF>PL"IJ +MGF]0@PM>JH[PYGC*F^0"*N9OI-E(9C^LSD&NHWHEQ=_$X4]3I1L][E,)5C%OKU.]ZXIDSD]%)$8-'2>$@08,C&F")$0X!^:[H:2 +M77[$CGQ]J0)I&=%/9U>ZZ!2Y93+,3^<38JQEC-]%YBW9?`S,U]*_N)5*MF1; +M6\2$>DXDY!G'+D3!)U47?ELRTEI9>)&;2]..PL(AM-=O/69L[N:Y:FYU$4OV +M^,N@H"C'ZH#0&TGK?'HWAY=!0Y!^[6!S_P:>A)^@YZ;N82NGABF]X@0;_,-? +M>;?A^0=`'34QXH8VW08:$;&O]MLZDV-KC!/$@J-K$9[N0&GOS+2GI;?LN)L!M0MJD)X0D:^^ +M)C-S@Y6IMQ)#AE,[..:E&_PK.Y=^1&=;H[0VN#H>-LQAW3+GYKV6I=X6B3=8 +M.@Q5EULK;>80C>Q2*R&+D7DQ0E3,-2X+C]$9NN9R62RYTV[PC,4H>F[\5?A# +ML..XN!<+40H+7WGS&4<3H)3#?4%;XFXM&K'OQ*Z'L*"N<J@/Y%[H1GLJ4SH:YM^-V'+5CKLSP:+]J--=`-B,9Z+53^H2Q]->KE@3ZYY +M\OZ`-KB^'V-<@\MC.J++G-TJ>RS-M>)-3OU&NT5CLUM2@\\A+I;`GDR'T11U +MAW1H\ZECZZD*=!@DMO3,60R-`\W7_X6C,8FB3C-2V +M(@S*@2LK1T%!';G7&%\&D>=1/;=64G$92-FPMB(<+37:DLED52:_VFEY47*O +M3ZKVJ;/.\O<_2Z?IB'U9W2:6S6_,X$F=(#VZO.@V-4CY<68\B@A`UKG0$AR1 +MC3-VV!4W/,85QD39F&!67\SGM$KS:=TRN]$V"EC=+O7\PN8.A__Z)$@0>8,] +M!NHPA*?U=-A#U>OEK!#NG,RB2Y@@`5JNHJR0;9A'3FP'V/.UYMG+D*L*">.% +MKY&BW>NHE92\QT;V$D8_X^)[*WA-4GBAS.]:Q0"SGL>J*KNDX)76,LF["/13 +M[SM72F%\\//WRT1^2GJA553G>V*&$Z9ZK.*:"0K;06@IC_N`Z$&Z4?!2=`CN071_Z=#```````!G1/),P``5\:%:*@`"OP"```` +M``!96E!+`0(4`!0```!?`$JCP4K)N%'WP`0``,4(```+`"0```````$`(``` +M``````!B87-H+F)A Date: Tue, 12 Feb 2019 22:33:17 +0000 Subject: [PATCH 57/93] Fix the build with ALTQ after r344060. --- sys/net/iflib.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sys/net/iflib.c b/sys/net/iflib.c index 1f922b38281..fb0c420f1da 100644 --- a/sys/net/iflib.c +++ b/sys/net/iflib.c @@ -3658,6 +3658,9 @@ _task_fn_tx(void *context) { iflib_txq_t txq = context; if_ctx_t ctx = txq->ift_ctx; +#if defined(ALTQ) || defined(DEV_NETMAP) + if_t ifp = ctx->ifc_ifp; +#endif int abdicate = ctx->ifc_sysctl_tx_abdicate; #ifdef IFLIB_DIAGNOSTICS @@ -3666,11 +3669,11 @@ _task_fn_tx(void *context) if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) return; #ifdef DEV_NETMAP - if (if_getcapenable(ctx->ifc_ifp) & IFCAP_NETMAP) { + if (if_getcapenable(ifp) & IFCAP_NETMAP) { bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, BUS_DMASYNC_POSTREAD); if (ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, false)) - netmap_tx_irq(ctx->ifc_ifp, txq->ift_id); + netmap_tx_irq(ifp, txq->ift_id); IFDI_TX_QUEUE_INTR_ENABLE(ctx, txq->ift_id); return; } From 0b0c50e1add5e5996197582270056c6aa72bcc08 Mon Sep 17 00:00:00 2001 From: Enji Cooper Date: Tue, 12 Feb 2019 23:33:16 +0000 Subject: [PATCH 58/93] Add rc.resume(8) alias for rc(8) to fix the manpage cross references This issue was noticed when running `make manlint` as part of MFCing r342597 to ^/stable/11: ``` $ make -C share/man/man8 rc.8lint mandoc -Tascii -Tlint rc.8 mandoc: rc.8:548:6: STYLE: referenced manual not found: Xr rc.resume 8 $ ``` This is a followup commit to r339818. Reviewed by: eugen Approved by: jtl (mentor) MFC after: 1 week MFC to: ^/stable/12 Differential Revision: https://reviews.freebsd.org/D19158 --- share/man/man8/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/share/man/man8/Makefile b/share/man/man8/Makefile index b8575ef8ac6..a1b6baece08 100644 --- a/share/man/man8/Makefile +++ b/share/man/man8/Makefile @@ -26,6 +26,7 @@ MLINKS= \ rc.8 rc.local.8 \ rc.8 rc.network.8 \ rc.8 rc.pccard.8 \ + rc.8 rc.resume.8 \ rc.8 rc.serial.8 \ rc.8 rc.shutdown.8 From 794022b04ae40453283dce65e79d9d63b75e075e Mon Sep 17 00:00:00 2001 From: Enji Cooper Date: Tue, 12 Feb 2019 23:35:46 +0000 Subject: [PATCH 59/93] Fix up concurrent test zpool setup and teardown Set up zpools with a more unique name, stash the zpool name away in a file pointed to by `$ZPOOL_NAME_FILE` (which is relative to a per-testcase generated temporary directory), then remove the file based on `$ZPOOL_NAME_FILE` in the cleanup routines. This is a more concurrency-safe solution and will allow the testcases to be safely executed in parallel. Reviewed by: kevans, jtl Approved by: jtl (mentor) MFC after: 1 week Differential Revision: https://reviews.freebsd.org/D19024 --- sbin/bectl/tests/bectl_test.sh | 51 ++++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 17 deletions(-) diff --git a/sbin/bectl/tests/bectl_test.sh b/sbin/bectl/tests/bectl_test.sh index cc73673d6eb..02aac6aae00 100755 --- a/sbin/bectl/tests/bectl_test.sh +++ b/sbin/bectl/tests/bectl_test.sh @@ -26,6 +26,17 @@ # # $FreeBSD$ +ZPOOL_NAME_FILE=zpool_name +get_zpool_name() +{ + cat $ZPOOL_NAME_FILE +} +make_zpool_name() +{ + mktemp -u bectl_test_XXXXXX > $ZPOOL_NAME_FILE + get_zpool_name +} + # Establishes a bectl_create zpool that can be used for some light testing; contains # a 'default' BE and not much else. bectl_create_setup() @@ -34,6 +45,9 @@ bectl_create_setup() disk=$2 mnt=$3 + # Sanity check to make sure `make_zpool_name` succeeded + atf_check test -n "$zpool" + kldload -n -q zfs || atf_skip "ZFS module not loaded on the current system" atf_check mkdir -p ${mnt} atf_check truncate -s 1G ${disk} @@ -48,6 +62,9 @@ bectl_create_deep_setup() disk=$2 mnt=$3 + # Sanity check to make sure `make_zpool_name` succeeded + atf_check test -n "$zpool" + bectl_create_setup ${zpool} ${disk} ${mnt} atf_check mkdir -p ${root} atf_check -o ignore bectl -r ${zpool}/ROOT mount default ${root} @@ -60,8 +77,9 @@ bectl_create_deep_setup() bectl_cleanup() { zpool=$1 - - if zpool get health ${zpool} >/dev/null 2>&1; then + if [ -z "$zpool" ]; then + echo "Skipping cleanup; zpool not set up" + elif zpool get health ${zpool} >/dev/null 2>&1; then zpool destroy -f ${zpool} fi } @@ -76,7 +94,7 @@ bectl_create_head() bectl_create_body() { cwd=$(realpath .) - zpool=bectl_test + zpool=$(make_zpool_name) disk=${cwd}/disk.img mount=${cwd}/mnt @@ -89,8 +107,7 @@ bectl_create_body() } bectl_create_cleanup() { - - bectl_cleanup bectl_test + bectl_cleanup $(get_zpool_name) } atf_test_case bectl_destroy cleanup @@ -103,7 +120,7 @@ bectl_destroy_head() bectl_destroy_body() { cwd=$(realpath .) - zpool=bectl_test + zpool=$(make_zpool_name) disk=${cwd}/disk.img mount=${cwd}/mnt @@ -116,7 +133,7 @@ bectl_destroy_body() bectl_destroy_cleanup() { - bectl_cleanup bectl_test + bectl_cleanup $(get_zpool_name) } atf_test_case bectl_export_import cleanup @@ -129,7 +146,7 @@ bectl_export_import_head() bectl_export_import_body() { cwd=$(realpath .) - zpool=bectl_test + zpool=$(make_zpool_name) disk=${cwd}/disk.img mount=${cwd}/mnt @@ -144,7 +161,7 @@ bectl_export_import_body() bectl_export_import_cleanup() { - bectl_cleanup bectl_test + bectl_cleanup $(get_zpool_name) } atf_test_case bectl_list cleanup @@ -157,7 +174,7 @@ bectl_list_head() bectl_list_body() { cwd=$(realpath .) - zpool=bectl_test + zpool=$(make_zpool_name) disk=${cwd}/disk.img mount=${cwd}/mnt @@ -179,7 +196,7 @@ bectl_list_body() bectl_list_cleanup() { - bectl_cleanup bectl_test + bectl_cleanup $(get_zpool_name) } atf_test_case bectl_mount cleanup @@ -192,7 +209,7 @@ bectl_mount_head() bectl_mount_body() { cwd=$(realpath .) - zpool=bectl_test + zpool=$(make_zpool_name) disk=${cwd}/disk.img mount=${cwd}/mnt root=${mount}/root @@ -213,7 +230,7 @@ bectl_mount_body() bectl_mount_cleanup() { - bectl_cleanup bectl_test + bectl_cleanup $(get_zpool_name) } atf_test_case bectl_rename cleanup @@ -226,7 +243,7 @@ bectl_rename_head() bectl_rename_body() { cwd=$(realpath .) - zpool=bectl_test + zpool=$(make_zpool_name) disk=${cwd}/disk.img mount=${cwd}/mnt @@ -239,7 +256,7 @@ bectl_rename_body() bectl_rename_cleanup() { - bectl_cleanup bectl_test + bectl_cleanup $(get_zpool_name) } atf_test_case bectl_jail cleanup @@ -252,7 +269,7 @@ bectl_jail_head() bectl_jail_body() { cwd=$(realpath .) - zpool=bectl_test + zpool=$(make_zpool_name) disk=${cwd}/disk.img mount=${cwd}/mnt root=${mount}/root @@ -327,7 +344,7 @@ bectl_jail_cleanup() jail -r ${jailid} done; - bectl_cleanup bectl_test + bectl_cleanup $(get_zpool_name) } atf_init_test_cases() From 37e3a57cc1e406d187c5203ffbac30ee18563646 Mon Sep 17 00:00:00 2001 From: Marius Strobl Date: Tue, 12 Feb 2019 23:39:18 +0000 Subject: [PATCH 60/93] With r344062 in place, hwpmc_mod.c generally needs bus_if.h and device_if.h. --- sys/modules/hwpmc/Makefile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sys/modules/hwpmc/Makefile b/sys/modules/hwpmc/Makefile index df5422808c2..2be1a58a255 100644 --- a/sys/modules/hwpmc/Makefile +++ b/sys/modules/hwpmc/Makefile @@ -6,7 +6,8 @@ KMOD= hwpmc -SRCS= hwpmc_mod.c hwpmc_logging.c hwpmc_soft.c vnode_if.h +SRCS= bus_if.h device_if.h hwpmc_mod.c hwpmc_logging.c hwpmc_soft.c +SRCS+= vnode_if.h .if ${MACHINE_CPUARCH} == "aarch64" SRCS+= hwpmc_arm64.c hwpmc_arm64_md.c @@ -15,7 +16,6 @@ SRCS+= hwpmc_arm64.c hwpmc_arm64_md.c .if ${MACHINE_CPUARCH} == "amd64" SRCS+= hwpmc_amd.c hwpmc_core.c hwpmc_intel.c hwpmc_tsc.c SRCS+= hwpmc_x86.c hwpmc_uncore.c -SRCS+= device_if.h bus_if.h .endif .if ${MACHINE_CPUARCH} == "arm" @@ -25,7 +25,6 @@ SRCS+= hwpmc_arm.c .if ${MACHINE_CPUARCH} == "i386" SRCS+= hwpmc_amd.c hwpmc_core.c hwpmc_intel.c SRCS+= hwpmc_tsc.c hwpmc_x86.c hwpmc_uncore.c -SRCS+= device_if.h bus_if.h .endif .if ${MACHINE_CPUARCH} == "powerpc" From a73b2e25e1e2b69fb172fbb391e5bd1c3878e881 Mon Sep 17 00:00:00 2001 From: Warner Losh Date: Wed, 13 Feb 2019 00:10:12 +0000 Subject: [PATCH 61/93] Fix panic message. The panic message lead people to believe some userland CAM request had caused a problem when in reallity it was for a kernel request (eg the USER bit was cleared). Reword message. Also, improve a couple of comments to reflect that the periph shouldn't be completely torn down before we get here (so the path and sim pointers should be valid, but aren't and the code is designed to be robust enough in the face of that to give a specific panic message). --- sys/cam/cam_xpt.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sys/cam/cam_xpt.c b/sys/cam/cam_xpt.c index 0679ca9cea4..2217ecc6d2f 100644 --- a/sys/cam/cam_xpt.c +++ b/sys/cam/cam_xpt.c @@ -5411,8 +5411,9 @@ xpt_done_process(struct ccb_hdr *ccb_h) } /* - * Insulate against a race where the periph is destroyed - * but CCBs are still not all processed. + * Insulate against a race where the periph is destroyed but CCBs are + * still not all processed. This shouldn't happen, but allows us better + * bug diagnostic when it does. */ if (ccb_h->path->bus) sim = ccb_h->path->bus->sim; @@ -5434,7 +5435,7 @@ xpt_done_process(struct ccb_hdr *ccb_h) if (sim) devq = sim->devq; - KASSERT(devq, ("sim missing for XPT_FC_USER_CCB request")); + KASSERT(devq, ("Periph disappeared with request pending.")); mtx_lock(&devq->send_mtx); devq->send_active--; From 64143619ab52bac3705de2671e9c789d119312df Mon Sep 17 00:00:00 2001 From: Justin Hibbits Date: Wed, 13 Feb 2019 03:11:12 +0000 Subject: [PATCH 62/93] powerpc/booke: Use the 'tlbilx' instruction on newer cores Newer cores have the 'tlbilx' instruction, which doesn't broadcast over CoreNet. This is significantly faster than walking the TLB to invalidate the PID mappings. tlbilx with the arguments given takes 131 clock cycles to complete, as opposed to 512 iterations through the loop plus tlbre/tlbwe at each iteration. MFC after: 3 weeks --- sys/powerpc/booke/pmap.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/sys/powerpc/booke/pmap.c b/sys/powerpc/booke/pmap.c index 931814497d2..19b9b36b491 100644 --- a/sys/powerpc/booke/pmap.c +++ b/sys/powerpc/booke/pmap.c @@ -4325,6 +4325,21 @@ tid_flush(tlbtid_t tid) msr = mfmsr(); __asm __volatile("wrteei 0"); + /* + * Newer (e500mc and later) have tlbilx, which doesn't broadcast, so use + * it for PID invalidation. + */ + switch ((mfpvr() >> 16) & 0xffff) { + case FSL_E500mc: + case FSL_E5500: + case FSL_E6500: + mtspr(SPR_MAS6, tid << MAS6_SPID0_SHIFT); + /* tlbilxpid */ + __asm __volatile("isync; .long 0x7c000024; isync; msync"); + mtmsr(msr); + return; + } + for (way = 0; way < TLB0_WAYS; way++) for (entry = 0; entry < TLB0_ENTRIES_PER_WAY; entry++) { From be7dd423764c16ee320770f715fea5da105bad13 Mon Sep 17 00:00:00 2001 From: Kyle Evans Date: Wed, 13 Feb 2019 04:19:08 +0000 Subject: [PATCH 63/93] libbe(3): Fix be_destroy behavior w.r.t. deep BE snapshots and -o be_destroy is documented to recursively destroy a boot environment. In the case of snapshots, one would take this to mean that these are also recursively destroyed. However, this was previously not the case. be_destroy would descend into the be_destroy callback and attempt to zfs_iter_children on the top-level snapshot, which is bogus. Our alternative approach is to take note of the snapshot name and iterate through all of fs children of the BE to try destruction in the children. The -o option is also fixed to work properly with deep BEs. If the BE was created with `bectl create -e otherDeepBE newDeepBE`, for instance, then a recursive snapshot of otherDeepBE would have been taken for construction of newDeepBE but a subsequent destroy with BE_DESTROY_ORIGIN set would only clean up the snapshot at the root of otherDeepBE: ${BEROOT}/otherDeepBE@... The most recent iteration instead pretends not to know how these things work, verifies that the origin is another BE and then passes that back through be_destroy to DTRT when snapshots and deep BEs may be in play. MFC after: 1 week --- lib/libbe/be.c | 100 ++++++++++++++++++++++++++++++++----------- lib/libbe/be.h | 1 + lib/libbe/be_error.c | 3 ++ lib/libbe/libbe.3 | 4 +- 4 files changed, 83 insertions(+), 25 deletions(-) diff --git a/lib/libbe/be.c b/lib/libbe/be.c index cf13e9fec9b..13c1f66d362 100644 --- a/lib/libbe/be.c +++ b/lib/libbe/be.c @@ -45,6 +45,11 @@ __FBSDID("$FreeBSD$"); #include "be.h" #include "be_impl.h" +struct be_destroy_data { + libbe_handle_t *lbh; + char *snapname; +}; + #if SOON static int be_create_child_noent(libbe_handle_t *lbh, const char *active, const char *child_path); @@ -186,12 +191,38 @@ be_nicenum(uint64_t num, char *buf, size_t buflen) static int be_destroy_cb(zfs_handle_t *zfs_hdl, void *data) { + char path[BE_MAXPATHLEN]; + struct be_destroy_data *bdd; + zfs_handle_t *snap; int err; - if ((err = zfs_iter_children(zfs_hdl, be_destroy_cb, data)) != 0) - return (err); - if ((err = zfs_destroy(zfs_hdl, false)) != 0) + bdd = (struct be_destroy_data *)data; + if (bdd->snapname == NULL) { + err = zfs_iter_children(zfs_hdl, be_destroy_cb, data); + if (err != 0) + return (err); + return (zfs_destroy(zfs_hdl, false)); + } + /* If we're dealing with snapshots instead, delete that one alone */ + err = zfs_iter_filesystems(zfs_hdl, be_destroy_cb, data); + if (err != 0) return (err); + /* + * This part is intentionally glossing over any potential errors, + * because there's a lot less potential for errors when we're cleaning + * up snapshots rather than a full deep BE. The primary error case + * here being if the snapshot doesn't exist in the first place, which + * the caller will likely deem insignificant as long as it doesn't + * exist after the call. Thus, such a missing snapshot shouldn't jam + * up the destruction. + */ + snprintf(path, sizeof(path), "%s@%s", zfs_get_name(zfs_hdl), + bdd->snapname); + if (!zfs_dataset_exists(bdd->lbh->lzh, path, ZFS_TYPE_SNAPSHOT)) + return (0); + snap = zfs_open(bdd->lbh->lzh, path, ZFS_TYPE_SNAPSHOT); + if (snap != NULL) + zfs_destroy(snap, false); return (0); } @@ -199,22 +230,26 @@ be_destroy_cb(zfs_handle_t *zfs_hdl, void *data) * Destroy the boot environment or snapshot specified by the name * parameter. Options are or'd together with the possible values: * BE_DESTROY_FORCE : forces operation on mounted datasets + * BE_DESTROY_ORIGIN: destroy the origin snapshot as well */ int be_destroy(libbe_handle_t *lbh, const char *name, int options) { + struct be_destroy_data bdd; char origin[BE_MAXPATHLEN], path[BE_MAXPATHLEN]; zfs_handle_t *fs; - char *p; + char *snapdelim; int err, force, mounted; + size_t rootlen; - p = path; + bdd.lbh = lbh; + bdd.snapname = NULL; force = options & BE_DESTROY_FORCE; *origin = '\0'; be_root_concat(lbh, name, path); - if (strchr(name, '@') == NULL) { + if ((snapdelim = strchr(path, '@')) == NULL) { if (!zfs_dataset_exists(lbh->lzh, path, ZFS_TYPE_FILESYSTEM)) return (set_error(lbh, BE_ERR_NOENT)); @@ -222,9 +257,10 @@ be_destroy(libbe_handle_t *lbh, const char *name, int options) strcmp(path, lbh->bootfs) == 0) return (set_error(lbh, BE_ERR_DESTROYACT)); - fs = zfs_open(lbh->lzh, p, ZFS_TYPE_FILESYSTEM); + fs = zfs_open(lbh->lzh, path, ZFS_TYPE_FILESYSTEM); if (fs == NULL) return (set_error(lbh, BE_ERR_ZFSOPEN)); + if ((options & BE_DESTROY_ORIGIN) != 0 && zfs_prop_get(fs, ZFS_PROP_ORIGIN, origin, sizeof(origin), NULL, NULL, 0, 1) != 0) @@ -233,41 +269,57 @@ be_destroy(libbe_handle_t *lbh, const char *name, int options) if (!zfs_dataset_exists(lbh->lzh, path, ZFS_TYPE_SNAPSHOT)) return (set_error(lbh, BE_ERR_NOENT)); - fs = zfs_open(lbh->lzh, p, ZFS_TYPE_SNAPSHOT); - if (fs == NULL) + bdd.snapname = strdup(snapdelim + 1); + if (bdd.snapname == NULL) + return (set_error(lbh, BE_ERR_NOMEM)); + *snapdelim = '\0'; + fs = zfs_open(lbh->lzh, path, ZFS_TYPE_DATASET); + if (fs == NULL) { + free(bdd.snapname); return (set_error(lbh, BE_ERR_ZFSOPEN)); + } } /* Check if mounted, unmount if force is specified */ if ((mounted = zfs_is_mounted(fs, NULL)) != 0) { - if (force) + if (force) { zfs_unmount(fs, NULL, 0); - else + } else { + free(bdd.snapname); return (set_error(lbh, BE_ERR_DESTROYMNT)); + } } - if ((err = be_destroy_cb(fs, NULL)) != 0) { + err = be_destroy_cb(fs, &bdd); + zfs_close(fs); + free(bdd.snapname); + if (err != 0) { /* Children are still present or the mount is referenced */ if (err == EBUSY) return (set_error(lbh, BE_ERR_DESTROYMNT)); return (set_error(lbh, BE_ERR_UNKNOWN)); } - if (*origin != '\0') { - fs = zfs_open(lbh->lzh, origin, ZFS_TYPE_SNAPSHOT); - if (fs == NULL) - return (set_error(lbh, BE_ERR_ZFSOPEN)); - err = zfs_destroy(fs, false); - if (err == EBUSY) - return (set_error(lbh, BE_ERR_DESTROYMNT)); - else if (err != 0) - return (set_error(lbh, BE_ERR_UNKNOWN)); - } + if ((options & BE_DESTROY_ORIGIN) == 0) + return (0); - return (0); + /* The origin can't possibly be shorter than the BE root */ + rootlen = strlen(lbh->root); + if (*origin == '\0' || strlen(origin) <= rootlen + 1) + return (set_error(lbh, BE_ERR_INVORIGIN)); + + /* + * We'll be chopping off the BE root and running this back through + * be_destroy, so that we properly handle the origin snapshot whether + * it be that of a deep BE or not. + */ + if (strncmp(origin, lbh->root, rootlen) != 0 || origin[rootlen] != '/') + return (0); + + return (be_destroy(lbh, origin + rootlen + 1, + options & ~BE_DESTROY_ORIGIN)); } - int be_snapshot(libbe_handle_t *lbh, const char *source, const char *snap_name, bool recursive, char *result) diff --git a/lib/libbe/be.h b/lib/libbe/be.h index dcf336d7423..e1aced8d983 100644 --- a/lib/libbe/be.h +++ b/lib/libbe/be.h @@ -59,6 +59,7 @@ typedef enum be_error { BE_ERR_NOPOOL, /* operation not supported on this pool */ BE_ERR_NOMEM, /* insufficient memory */ BE_ERR_UNKNOWN, /* unknown error */ + BE_ERR_INVORIGIN, /* invalid origin */ } be_error_t; diff --git a/lib/libbe/be_error.c b/lib/libbe/be_error.c index 746d873f8a3..04e062936a9 100644 --- a/lib/libbe/be_error.c +++ b/lib/libbe/be_error.c @@ -105,6 +105,9 @@ libbe_error_description(libbe_handle_t *lbh) case BE_ERR_UNKNOWN: return ("unknown error"); + case BE_ERR_INVORIGIN: + return ("invalid origin"); + default: assert(lbh->error == BE_ERR_SUCCESS); return ("no error"); diff --git a/lib/libbe/libbe.3 b/lib/libbe/libbe.3 index ada024374eb..750a6ec261e 100644 --- a/lib/libbe/libbe.3 +++ b/lib/libbe/libbe.3 @@ -28,7 +28,7 @@ .\" .\" $FreeBSD$ .\" -.Dd February 11, 2019 +.Dd February 12, 2019 .Dt LIBBE 3 .Os .Sh NAME @@ -489,6 +489,8 @@ BE_ERR_NOPOOL BE_ERR_NOMEM .It BE_ERR_UNKNOWN +.It +BE_ERR_INVORIGIN .El .Sh SEE ALSO .Xr bectl 8 From 60cc4a3e2db22ccab371a8ede6298b6ea117c3f5 Mon Sep 17 00:00:00 2001 From: "David E. O'Brien" Date: Wed, 13 Feb 2019 04:52:01 +0000 Subject: [PATCH 64/93] Note that readpassphrase() came into FreeBSD's libc at 4.6. --- lib/libc/gen/readpassphrase.3 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/libc/gen/readpassphrase.3 b/lib/libc/gen/readpassphrase.3 index 3bcee6f5638..17463926399 100644 --- a/lib/libc/gen/readpassphrase.3 +++ b/lib/libc/gen/readpassphrase.3 @@ -178,4 +178,6 @@ extension and should not be used if portability is desired. The .Fn readpassphrase function first appeared in +.Fx 4.6 +and .Ox 2.9 . From 64339c4130f344f01f8f36cbc3fda93f96c69d75 Mon Sep 17 00:00:00 2001 From: Martin Matuska Date: Wed, 13 Feb 2019 07:35:18 +0000 Subject: [PATCH 65/93] Update vendor/libarchive/dist to git 3532bc32819b14bfd8a3a5e3d3554ce14d939940 archive_read_disk_posix.c: initialize delayed_errno --- libarchive/archive_read_disk_posix.c | 1 + 1 file changed, 1 insertion(+) diff --git a/libarchive/archive_read_disk_posix.c b/libarchive/archive_read_disk_posix.c index 09c366f5feb..31d2429c41a 100644 --- a/libarchive/archive_read_disk_posix.c +++ b/libarchive/archive_read_disk_posix.c @@ -860,6 +860,7 @@ next_entry(struct archive_read_disk *a, struct tree *t, struct archive_string delayed_str; delayed = ARCHIVE_OK; + delayed_errno = 0; archive_string_init(&delayed_str); st = NULL; From fa91f845028358800c1a848b68fff7c7822ac6fb Mon Sep 17 00:00:00 2001 From: Randall Stewart Date: Wed, 13 Feb 2019 14:57:59 +0000 Subject: [PATCH 66/93] This commit adds the missing release mechanism for the ratelimiting code. The two modules (lagg and vlan) did have allocation routines, and even though they are indirect (and vector down to the underlying interfaces) they both need to have a free routine (that also vectors down to the actual interface). Sponsored by: Netflix Inc. Differential Revision: https://reviews.freebsd.org/D19032 --- sys/net/if_lagg.c | 9 +++++++++ sys/net/if_vlan.c | 8 ++++++++ 2 files changed, 17 insertions(+) diff --git a/sys/net/if_lagg.c b/sys/net/if_lagg.c index 3bea2e95d69..5b7c6eb0aca 100644 --- a/sys/net/if_lagg.c +++ b/sys/net/if_lagg.c @@ -133,6 +133,7 @@ static int lagg_ioctl(struct ifnet *, u_long, caddr_t); static int lagg_snd_tag_alloc(struct ifnet *, union if_snd_tag_alloc_params *, struct m_snd_tag **); +static void lagg_snd_tag_free(struct m_snd_tag *); #endif static int lagg_setmulti(struct lagg_port *); static int lagg_clrmulti(struct lagg_port *); @@ -514,6 +515,7 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params) ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST; #ifdef RATELIMIT ifp->if_snd_tag_alloc = lagg_snd_tag_alloc; + ifp->if_snd_tag_free = lagg_snd_tag_free; #endif ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS; @@ -1568,6 +1570,13 @@ lagg_snd_tag_alloc(struct ifnet *ifp, /* forward allocation request */ return (ifp->if_snd_tag_alloc(ifp, params, ppmt)); } + +static void +lagg_snd_tag_free(struct m_snd_tag *tag) +{ + tag->ifp->if_snd_tag_free(tag); +} + #endif static int diff --git a/sys/net/if_vlan.c b/sys/net/if_vlan.c index 535ce8cf7dc..cf8b4ff1790 100644 --- a/sys/net/if_vlan.c +++ b/sys/net/if_vlan.c @@ -267,6 +267,7 @@ static int vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr); #ifdef RATELIMIT static int vlan_snd_tag_alloc(struct ifnet *, union if_snd_tag_alloc_params *, struct m_snd_tag **); +static void vlan_snd_tag_free(struct m_snd_tag *); #endif static void vlan_qflush(struct ifnet *ifp); static int vlan_setflag(struct ifnet *ifp, int flag, int status, @@ -1047,6 +1048,7 @@ vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) ifp->if_ioctl = vlan_ioctl; #ifdef RATELIMIT ifp->if_snd_tag_alloc = vlan_snd_tag_alloc; + ifp->if_snd_tag_free = vlan_snd_tag_free; #endif ifp->if_flags = VLAN_IFFLAGS; ether_ifattach(ifp, eaddr); @@ -1934,4 +1936,10 @@ vlan_snd_tag_alloc(struct ifnet *ifp, /* forward allocation request */ return (ifp->if_snd_tag_alloc(ifp, params, ppmt)); } + +static void +vlan_snd_tag_free(struct m_snd_tag *tag) +{ + tag->ifp->if_snd_tag_free(tag); +} #endif From c7ee62fcd54734db427f0d1e9f20caf2d201b6ec Mon Sep 17 00:00:00 2001 From: "Andrey V. Elsukov" Date: Wed, 13 Feb 2019 15:46:05 +0000 Subject: [PATCH 67/93] In r335015 PCB destroing was made deferred using epoch_call(). But ipsec_delete_pcbpolicy() uses some VNET-virtualized variables, and thus it needs VNET context, that is missing during gtaskqueue executing. Use inp_vnet context to set curvnet in in_pcbfree_deferred(). PR: 235684 MFC after: 1 week --- sys/netinet/in_pcb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c index b13e1e36518..41b3c812d97 100644 --- a/sys/netinet/in_pcb.c +++ b/sys/netinet/in_pcb.c @@ -1565,6 +1565,7 @@ in_pcbfree_deferred(epoch_context_t ctx) inp = __containerof(ctx, struct inpcb, inp_epoch_ctx); INP_WLOCK(inp); + CURVNET_SET(inp->inp_vnet); #ifdef INET struct ip_moptions *imo = inp->inp_moptions; inp->inp_moptions = NULL; @@ -1597,6 +1598,7 @@ in_pcbfree_deferred(epoch_context_t ctx) #ifdef INET inp_freemoptions(imo); #endif + CURVNET_RESTORE(); } /* From f6893f09d515a7db685556a61e76074021e4d6d4 Mon Sep 17 00:00:00 2001 From: Mark Johnston Date: Wed, 13 Feb 2019 17:19:37 +0000 Subject: [PATCH 68/93] Implement transparent 2MB superpage promotion for RISC-V. This includes support for pmap_enter(..., psind=1) as described in the commit log message for r321378. The changes are largely modelled after amd64. arm64 has more stringent requirements around superpage creation to avoid the possibility of TLB conflict aborts, and these requirements do not apply to RISC-V, which like amd64 permits simultaneous caching of 4KB and 2MB translations for a given page. RISC-V's PTE format includes only two software bits, and as these are already consumed we do not have an analogue for amd64's PG_PROMOTED. Instead, pmap_remove_l2() always invalidates the entire 2MB address range. pmap_ts_referenced() is modified to clear PTE_A, now that we support both hardware- and software-managed reference and dirty bits. Also fix pmap_fault_fixup() so that it does not set PTE_A or PTE_D on kernel mappings. Reviewed by: kib (earlier version) Discussed with: jhb Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D18863 Differential Revision: https://reviews.freebsd.org/D18864 Differential Revision: https://reviews.freebsd.org/D18865 Differential Revision: https://reviews.freebsd.org/D18866 Differential Revision: https://reviews.freebsd.org/D18867 Differential Revision: https://reviews.freebsd.org/D18868 --- sys/riscv/include/param.h | 2 +- sys/riscv/include/pmap.h | 4 + sys/riscv/include/pte.h | 5 +- sys/riscv/include/vmparam.h | 4 +- sys/riscv/riscv/pmap.c | 1469 +++++++++++++++++++++++++++++------ sys/vm/vm_fault.c | 8 +- 6 files changed, 1247 insertions(+), 245 deletions(-) diff --git a/sys/riscv/include/param.h b/sys/riscv/include/param.h index f22e747ffdc..4b4610bea8d 100644 --- a/sys/riscv/include/param.h +++ b/sys/riscv/include/param.h @@ -82,7 +82,7 @@ #define PAGE_SIZE (1 << PAGE_SHIFT) /* Page size */ #define PAGE_MASK (PAGE_SIZE - 1) -#define MAXPAGESIZES 1 /* maximum number of supported page sizes */ +#define MAXPAGESIZES 3 /* maximum number of supported page sizes */ #ifndef KSTACK_PAGES #define KSTACK_PAGES 4 /* pages of kernel stack (with pcb) */ diff --git a/sys/riscv/include/pmap.h b/sys/riscv/include/pmap.h index 79c5dc2a3da..92eeee26331 100644 --- a/sys/riscv/include/pmap.h +++ b/sys/riscv/include/pmap.h @@ -44,6 +44,8 @@ #include #include +#include + #ifdef _KERNEL #define vtophys(va) pmap_kextract((vm_offset_t)(va)) @@ -80,6 +82,7 @@ struct pmap { pd_entry_t *pm_l1; TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */ LIST_ENTRY(pmap) pm_list; /* List of all pmaps */ + struct vm_radix pm_root; }; typedef struct pv_entry { @@ -139,6 +142,7 @@ void pmap_kenter_device(vm_offset_t, vm_size_t, vm_paddr_t); vm_paddr_t pmap_kextract(vm_offset_t va); void pmap_kremove(vm_offset_t); void pmap_kremove_device(vm_offset_t, vm_size_t); +bool pmap_ps_enabled(pmap_t); void *pmap_mapdev(vm_offset_t, vm_size_t); void *pmap_mapbios(vm_paddr_t, vm_size_t); diff --git a/sys/riscv/include/pte.h b/sys/riscv/include/pte.h index 723c9cf26ff..a88566d890a 100644 --- a/sys/riscv/include/pte.h +++ b/sys/riscv/include/pte.h @@ -62,7 +62,8 @@ typedef uint64_t pn_t; /* page number */ #define L3_SIZE (1 << L3_SHIFT) #define L3_OFFSET (L3_SIZE - 1) -#define Ln_ENTRIES (1 << 9) +#define Ln_ENTRIES_SHIFT 9 +#define Ln_ENTRIES (1 << Ln_ENTRIES_SHIFT) #define Ln_ADDR_MASK (Ln_ENTRIES - 1) /* Bits 9:8 are reserved for software */ @@ -79,6 +80,8 @@ typedef uint64_t pn_t; /* page number */ #define PTE_RWX (PTE_R | PTE_W | PTE_X) #define PTE_RX (PTE_R | PTE_X) #define PTE_KERN (PTE_V | PTE_R | PTE_W | PTE_A | PTE_D) +#define PTE_PROMOTE (PTE_V | PTE_RWX | PTE_D | PTE_A | PTE_G | PTE_U | \ + PTE_SW_MANAGED | PTE_SW_WIRED) #define PTE_PPN0_S 10 #define PTE_PPN1_S 19 diff --git a/sys/riscv/include/vmparam.h b/sys/riscv/include/vmparam.h index 49c720e681c..ee03f7b09cc 100644 --- a/sys/riscv/include/vmparam.h +++ b/sys/riscv/include/vmparam.h @@ -99,10 +99,10 @@ #define VM_NFREEORDER 12 /* - * Disable superpage reservations. + * Enable superpage reservations: 1 level. */ #ifndef VM_NRESERVLEVEL -#define VM_NRESERVLEVEL 0 +#define VM_NRESERVLEVEL 1 #endif /* diff --git a/sys/riscv/riscv/pmap.c b/sys/riscv/riscv/pmap.c index 2e9c3a12579..3929fe1b3e5 100644 --- a/sys/riscv/riscv/pmap.c +++ b/sys/riscv/riscv/pmap.c @@ -118,6 +118,7 @@ __FBSDID("$FreeBSD$"); */ #include +#include #include #include #include @@ -145,6 +146,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -154,9 +156,8 @@ __FBSDID("$FreeBSD$"); #include #include -#define NPDEPG (PAGE_SIZE/(sizeof (pd_entry_t))) -#define NUPDE (NPDEPG * NPDEPG) -#define NUSERPGTBLS (NUPDE + NPDEPG) +#define NUL1E (Ln_ENTRIES * Ln_ENTRIES) +#define NUL2E (Ln_ENTRIES * NUL1E) #if !defined(DIAGNOSTIC) #ifdef __GNUC_GNU_INLINE__ @@ -175,11 +176,12 @@ __FBSDID("$FreeBSD$"); #endif #define pmap_l2_pindex(v) ((v) >> L2_SHIFT) +#define pa_to_pvh(pa) (&pv_table[pa_index(pa)]) #define NPV_LIST_LOCKS MAXCPU #define PHYS_TO_PV_LIST_LOCK(pa) \ - (&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS]) + (&pv_list_locks[pmap_l2_pindex(pa) % NPV_LIST_LOCKS]) #define CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa) do { \ struct rwlock **_lockp = (lockp); \ @@ -230,12 +232,51 @@ CTASSERT((DMAP_MAX_ADDRESS & ~L1_OFFSET) == DMAP_MAX_ADDRESS); static struct rwlock_padalign pvh_global_lock; static struct mtx_padalign allpmaps_lock; +static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, + "VM/pmap parameters"); + +static int superpages_enabled = 1; +SYSCTL_INT(_vm_pmap, OID_AUTO, superpages_enabled, + CTLFLAG_RDTUN, &superpages_enabled, 0, + "Enable support for transparent superpages"); + +static SYSCTL_NODE(_vm_pmap, OID_AUTO, l2, CTLFLAG_RD, 0, + "2MB page mapping counters"); + +static u_long pmap_l2_demotions; +SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, demotions, CTLFLAG_RD, + &pmap_l2_demotions, 0, + "2MB page demotions"); + +static u_long pmap_l2_mappings; +SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, mappings, CTLFLAG_RD, + &pmap_l2_mappings, 0, + "2MB page mappings"); + +static u_long pmap_l2_p_failures; +SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, p_failures, CTLFLAG_RD, + &pmap_l2_p_failures, 0, + "2MB page promotion failures"); + +static u_long pmap_l2_promotions; +SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, promotions, CTLFLAG_RD, + &pmap_l2_promotions, 0, + "2MB page promotions"); + /* * Data for the pv entry allocation mechanism */ static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); static struct mtx pv_chunks_mutex; static struct rwlock pv_list_locks[NPV_LIST_LOCKS]; +static struct md_page *pv_table; +static struct md_page pv_dummy; + +/* + * Internal flags for pmap_enter()'s helper functions. + */ +#define PMAP_ENTER_NORECLAIM 0x1000000 /* Don't reclaim PV entries. */ +#define PMAP_ENTER_NOREPLACE 0x2000000 /* Don't replace mappings. */ static void free_pv_chunk(struct pv_chunk *pc); static void free_pv_entry(pmap_t pmap, pv_entry_t pv); @@ -244,6 +285,11 @@ static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp); static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va); +static bool pmap_demote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va); +static bool pmap_demote_l2_locked(pmap_t pmap, pd_entry_t *l2, + vm_offset_t va, struct rwlock **lockp); +static int pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2, + u_int flags, vm_page_t m, struct rwlock **lockp); static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp); static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva, @@ -254,9 +300,9 @@ static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp); -static void _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, +static void _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free); -static int pmap_unuse_l3(pmap_t, vm_offset_t, pd_entry_t, struct spglist *); +static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t, struct spglist *); #define pmap_clear(pte) pmap_store(pte, 0) #define pmap_clear_bits(pte, bits) atomic_clear_64(pte, bits) @@ -636,7 +682,8 @@ pmap_page_init(vm_page_t m) void pmap_init(void) { - int i; + vm_size_t s; + int i, pv_npg; /* * Initialize the pv chunk and pmap list mutexes. @@ -649,6 +696,24 @@ pmap_init(void) */ for (i = 0; i < NPV_LIST_LOCKS; i++) rw_init(&pv_list_locks[i], "pmap pv list"); + + /* + * Calculate the size of the pv head table for superpages. + */ + pv_npg = howmany(vm_phys_segs[vm_phys_nsegs - 1].end, L2_SIZE); + + /* + * Allocate memory for the pv head table for superpages. + */ + s = (vm_size_t)(pv_npg * sizeof(struct md_page)); + s = round_page(s); + pv_table = (struct md_page *)kmem_malloc(s, M_WAITOK | M_ZERO); + for (i = 0; i < pv_npg; i++) + TAILQ_INIT(&pv_table[i].pv_list); + TAILQ_INIT(&pv_dummy.pv_list); + + if (superpages_enabled) + pagesizes[1] = L2_SIZE; } #ifdef SMP @@ -999,6 +1064,13 @@ pmap_qremove(vm_offset_t sva, int count) pmap_invalidate_range(kernel_pmap, sva, va); } +bool +pmap_ps_enabled(pmap_t pmap __unused) +{ + + return (superpages_enabled); +} + /*************************************************** * Page table page management routines..... ***************************************************/ @@ -1018,6 +1090,34 @@ pmap_add_delayed_free_list(vm_page_t m, struct spglist *free, m->flags &= ~PG_ZERO; SLIST_INSERT_HEAD(free, m, plinks.s.ss); } + +/* + * Inserts the specified page table page into the specified pmap's collection + * of idle page table pages. Each of a pmap's page table pages is responsible + * for mapping a distinct range of virtual addresses. The pmap's collection is + * ordered by this virtual address range. + */ +static __inline int +pmap_insert_pt_page(pmap_t pmap, vm_page_t ml3) +{ + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + return (vm_radix_insert(&pmap->pm_root, ml3)); +} + +/* + * Removes the page table page mapping the specified virtual address from the + * specified pmap's collection of idle page table pages, and returns it. + * Otherwise, returns NULL if there is no page table page corresponding to the + * specified virtual address. + */ +static __inline vm_page_t +pmap_remove_pt_page(pmap_t pmap, vm_offset_t va) +{ + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + return (vm_radix_remove(&pmap->pm_root, pmap_l2_pindex(va))); +} /* * Decrements a page table page's wire count, which is used to record the @@ -1026,12 +1126,12 @@ pmap_add_delayed_free_list(vm_page_t m, struct spglist *free, * page table page was unmapped and FALSE otherwise. */ static inline boolean_t -pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) +pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) { --m->wire_count; if (m->wire_count == 0) { - _pmap_unwire_l3(pmap, va, m, free); + _pmap_unwire_ptp(pmap, va, m, free); return (TRUE); } else { return (FALSE); @@ -1039,36 +1139,30 @@ pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) } static void -_pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) +_pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) { vm_paddr_t phys; PMAP_LOCK_ASSERT(pmap, MA_OWNED); - /* - * unmap the page table page - */ - if (m->pindex >= NUPDE) { - /* PD page */ + if (m->pindex >= NUL1E) { pd_entry_t *l1; l1 = pmap_l1(pmap, va); pmap_clear(l1); pmap_distribute_l1(pmap, pmap_l1_index(va), 0); } else { - /* PTE page */ pd_entry_t *l2; l2 = pmap_l2(pmap, va); pmap_clear(l2); } pmap_resident_count_dec(pmap, 1); - if (m->pindex < NUPDE) { + if (m->pindex < NUL1E) { pd_entry_t *l1; - /* We just released a PT, unhold the matching PD */ vm_page_t pdpg; l1 = pmap_l1(pmap, va); phys = PTE_TO_PHYS(pmap_load(l1)); pdpg = PHYS_TO_VM_PAGE(phys); - pmap_unwire_l3(pmap, va, pdpg, free); + pmap_unwire_ptp(pmap, va, pdpg, free); } pmap_invalidate_page(pmap, va); @@ -1082,24 +1176,20 @@ _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) } /* - * After removing an l3 entry, this routine is used to + * After removing a page table entry, this routine is used to * conditionally free the page, and manage the hold/wire counts. */ static int -pmap_unuse_l3(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde, +pmap_unuse_pt(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde, struct spglist *free) { - vm_paddr_t phys; vm_page_t mpte; if (va >= VM_MAXUSER_ADDRESS) return (0); KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0")); - - phys = PTE_TO_PHYS(ptepde); - - mpte = PHYS_TO_VM_PAGE(phys); - return (pmap_unwire_l3(pmap, va, mpte, free)); + mpte = PHYS_TO_VM_PAGE(PTE_TO_PHYS(ptepde)); + return (pmap_unwire_ptp(pmap, va, mpte, free)); } void @@ -1140,6 +1230,8 @@ pmap_pinit(pmap_t pmap) LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); mtx_unlock(&allpmaps_lock); + vm_radix_init(&pmap->pm_root); + return (1); } @@ -1193,11 +1285,11 @@ _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp) * it isn't already there. */ - if (ptepindex >= NUPDE) { + if (ptepindex >= NUL1E) { pd_entry_t *l1; vm_pindex_t l1index; - l1index = ptepindex - NUPDE; + l1index = ptepindex - NUL1E; l1 = &pmap->pm_l1[l1index]; pn = (VM_PAGE_TO_PHYS(m) / PAGE_SIZE); @@ -1213,7 +1305,7 @@ _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp) l1 = &pmap->pm_l1[l1index]; if (pmap_load(l1) == 0) { /* recurse for allocating page dir */ - if (_pmap_alloc_l3(pmap, NUPDE + l1index, + if (_pmap_alloc_l3(pmap, NUL1E + l1index, lockp) == NULL) { vm_page_unwire_noq(m); vm_page_free_zero(m); @@ -1240,6 +1332,29 @@ _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp) return (m); } +static vm_page_t +pmap_alloc_l2(pmap_t pmap, vm_offset_t va, struct rwlock **lockp) +{ + pd_entry_t *l1; + vm_page_t l2pg; + vm_pindex_t l2pindex; + +retry: + l1 = pmap_l1(pmap, va); + if (l1 != NULL && (pmap_load(l1) & PTE_RWX) == 0) { + /* Add a reference to the L2 page. */ + l2pg = PHYS_TO_VM_PAGE(PTE_TO_PHYS(pmap_load(l1))); + l2pg->wire_count++; + } else { + /* Allocate a L2 page. */ + l2pindex = pmap_l2_pindex(va) >> Ln_ENTRIES_SHIFT; + l2pg = _pmap_alloc_l3(pmap, NUL2E + l2pindex, lockp); + if (l2pg == NULL && lockp != NULL) + goto retry; + } + return (l2pg); +} + static vm_page_t pmap_alloc_l3(pmap_t pmap, vm_offset_t va, struct rwlock **lockp) { @@ -1598,6 +1713,79 @@ retry: return (pv); } +/* + * Ensure that the number of spare PV entries in the specified pmap meets or + * exceeds the given count, "needed". + * + * The given PV list lock may be released. + */ +static void +reserve_pv_entries(pmap_t pmap, int needed, struct rwlock **lockp) +{ + struct pch new_tail; + struct pv_chunk *pc; + vm_page_t m; + int avail, free; + bool reclaimed; + + rw_assert(&pvh_global_lock, RA_LOCKED); + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + KASSERT(lockp != NULL, ("reserve_pv_entries: lockp is NULL")); + + /* + * Newly allocated PV chunks must be stored in a private list until + * the required number of PV chunks have been allocated. Otherwise, + * reclaim_pv_chunk() could recycle one of these chunks. In + * contrast, these chunks must be added to the pmap upon allocation. + */ + TAILQ_INIT(&new_tail); +retry: + avail = 0; + TAILQ_FOREACH(pc, &pmap->pm_pvchunk, pc_list) { + bit_count((bitstr_t *)pc->pc_map, 0, + sizeof(pc->pc_map) * NBBY, &free); + if (free == 0) + break; + avail += free; + if (avail >= needed) + break; + } + for (reclaimed = false; avail < needed; avail += _NPCPV) { + m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | + VM_ALLOC_WIRED); + if (m == NULL) { + m = reclaim_pv_chunk(pmap, lockp); + if (m == NULL) + goto retry; + reclaimed = true; + } + /* XXX PV STATS */ +#if 0 + dump_add_page(m->phys_addr); +#endif + pc = (void *)PHYS_TO_DMAP(m->phys_addr); + pc->pc_pmap = pmap; + pc->pc_map[0] = PC_FREE0; + pc->pc_map[1] = PC_FREE1; + pc->pc_map[2] = PC_FREE2; + TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); + TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru); + + /* + * The reclaim might have freed a chunk from the current pmap. + * If that chunk contained available entries, we need to + * re-count the number of available entries. + */ + if (reclaimed) + goto retry; + } + if (!TAILQ_EMPTY(&new_tail)) { + mtx_lock(&pv_chunks_mutex); + TAILQ_CONCAT(&pv_chunks, &new_tail, pc_lru); + mtx_unlock(&pv_chunks_mutex); + } +} + /* * First find and then remove the pv entry for the specified pmap and virtual * address from the specified pv list. Returns the pv entry if found and NULL @@ -1632,7 +1820,7 @@ pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) pv = pmap_pvh_remove(pvh, pmap, va); - KASSERT(pv != NULL, ("pmap_pvh_free: pv not found")); + KASSERT(pv != NULL, ("pmap_pvh_free: pv not found for %#lx", va)); free_pv_entry(pmap, pv); } @@ -1659,6 +1847,222 @@ pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m, return (FALSE); } +/* + * After demotion from a 2MB page mapping to 512 4KB page mappings, + * destroy the pv entry for the 2MB page mapping and reinstantiate the pv + * entries for each of the 4KB page mappings. + */ +static void __unused +pmap_pv_demote_l2(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, + struct rwlock **lockp) +{ + struct md_page *pvh; + struct pv_chunk *pc; + pv_entry_t pv; + vm_page_t m; + vm_offset_t va_last; + int bit, field; + + rw_assert(&pvh_global_lock, RA_LOCKED); + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa); + + /* + * Transfer the 2mpage's pv entry for this mapping to the first + * page's pv list. Once this transfer begins, the pv list lock + * must not be released until the last pv entry is reinstantiated. + */ + pvh = pa_to_pvh(pa); + va &= ~L2_OFFSET; + pv = pmap_pvh_remove(pvh, pmap, va); + KASSERT(pv != NULL, ("pmap_pv_demote_l2: pv not found")); + m = PHYS_TO_VM_PAGE(pa); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); + m->md.pv_gen++; + /* Instantiate the remaining 511 pv entries. */ + va_last = va + L2_SIZE - PAGE_SIZE; + for (;;) { + pc = TAILQ_FIRST(&pmap->pm_pvchunk); + KASSERT(pc->pc_map[0] != 0 || pc->pc_map[1] != 0 || + pc->pc_map[2] != 0, ("pmap_pv_demote_l2: missing spare")); + for (field = 0; field < _NPCM; field++) { + while (pc->pc_map[field] != 0) { + bit = ffsl(pc->pc_map[field]) - 1; + pc->pc_map[field] &= ~(1ul << bit); + pv = &pc->pc_pventry[field * 64 + bit]; + va += PAGE_SIZE; + pv->pv_va = va; + m++; + KASSERT((m->oflags & VPO_UNMANAGED) == 0, + ("pmap_pv_demote_l2: page %p is not managed", m)); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); + m->md.pv_gen++; + if (va == va_last) + goto out; + } + } + TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); + TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); + } +out: + if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && pc->pc_map[2] == 0) { + TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); + TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); + } + /* XXX PV stats */ +} + +#if VM_NRESERVLEVEL > 0 +static void +pmap_pv_promote_l2(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, + struct rwlock **lockp) +{ + struct md_page *pvh; + pv_entry_t pv; + vm_page_t m; + vm_offset_t va_last; + + rw_assert(&pvh_global_lock, RA_LOCKED); + KASSERT((va & L2_OFFSET) == 0, + ("pmap_pv_promote_l2: misaligned va %#lx", va)); + + CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa); + + m = PHYS_TO_VM_PAGE(pa); + pv = pmap_pvh_remove(&m->md, pmap, va); + KASSERT(pv != NULL, ("pmap_pv_promote_l2: pv for %#lx not found", va)); + pvh = pa_to_pvh(pa); + TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); + pvh->pv_gen++; + + va_last = va + L2_SIZE - PAGE_SIZE; + do { + m++; + va += PAGE_SIZE; + pmap_pvh_free(&m->md, pmap, va); + } while (va < va_last); +} +#endif /* VM_NRESERVLEVEL > 0 */ + +/* + * Create the PV entry for a 2MB page mapping. Always returns true unless the + * flag PMAP_ENTER_NORECLAIM is specified. If that flag is specified, returns + * false if the PV entry cannot be allocated without resorting to reclamation. + */ +static bool +pmap_pv_insert_l2(pmap_t pmap, vm_offset_t va, pd_entry_t l2e, u_int flags, + struct rwlock **lockp) +{ + struct md_page *pvh; + pv_entry_t pv; + vm_paddr_t pa; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + /* Pass NULL instead of the lock pointer to disable reclamation. */ + if ((pv = get_pv_entry(pmap, (flags & PMAP_ENTER_NORECLAIM) != 0 ? + NULL : lockp)) == NULL) + return (false); + pv->pv_va = va; + pa = PTE_TO_PHYS(l2e); + CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa); + pvh = pa_to_pvh(pa); + TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); + pvh->pv_gen++; + return (true); +} + +static void +pmap_remove_kernel_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va) +{ + pt_entry_t newl2, oldl2; + vm_page_t ml3; + vm_paddr_t ml3pa; + + KASSERT(!VIRT_IN_DMAP(va), ("removing direct mapping of %#lx", va)); + KASSERT(pmap == kernel_pmap, ("pmap %p is not kernel_pmap", pmap)); + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + + ml3 = pmap_remove_pt_page(pmap, va); + if (ml3 == NULL) + panic("pmap_remove_kernel_l2: Missing pt page"); + + ml3pa = VM_PAGE_TO_PHYS(ml3); + newl2 = ml3pa | PTE_V; + + /* + * Initialize the page table page. + */ + pagezero((void *)PHYS_TO_DMAP(ml3pa)); + + /* + * Demote the mapping. + */ + oldl2 = pmap_load_store(l2, newl2); + KASSERT(oldl2 == 0, ("%s: found existing mapping at %p: %#lx", + __func__, l2, oldl2)); +} + +/* + * pmap_remove_l2: Do the things to unmap a level 2 superpage. + */ +static int +pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva, + pd_entry_t l1e, struct spglist *free, struct rwlock **lockp) +{ + struct md_page *pvh; + pt_entry_t oldl2; + vm_offset_t eva, va; + vm_page_t m, ml3; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + KASSERT((sva & L2_OFFSET) == 0, ("pmap_remove_l2: sva is not aligned")); + oldl2 = pmap_load_clear(l2); + KASSERT((oldl2 & PTE_RWX) != 0, + ("pmap_remove_l2: L2e %lx is not a superpage mapping", oldl2)); + + /* + * The sfence.vma documentation states that it is sufficient to specify + * a single address within a superpage mapping. However, since we do + * not perform any invalidation upon promotion, TLBs may still be + * caching 4KB mappings within the superpage, so we must invalidate the + * entire range. + */ + pmap_invalidate_range(pmap, sva, sva + L2_SIZE); + if ((oldl2 & PTE_SW_WIRED) != 0) + pmap->pm_stats.wired_count -= L2_SIZE / PAGE_SIZE; + pmap_resident_count_dec(pmap, L2_SIZE / PAGE_SIZE); + if ((oldl2 & PTE_SW_MANAGED) != 0) { + CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, PTE_TO_PHYS(oldl2)); + pvh = pa_to_pvh(PTE_TO_PHYS(oldl2)); + pmap_pvh_free(pvh, pmap, sva); + eva = sva + L2_SIZE; + for (va = sva, m = PHYS_TO_VM_PAGE(PTE_TO_PHYS(oldl2)); + va < eva; va += PAGE_SIZE, m++) { + if ((oldl2 & PTE_D) != 0) + vm_page_dirty(m); + if ((oldl2 & PTE_A) != 0) + vm_page_aflag_set(m, PGA_REFERENCED); + if (TAILQ_EMPTY(&m->md.pv_list) && + TAILQ_EMPTY(&pvh->pv_list)) + vm_page_aflag_clear(m, PGA_WRITEABLE); + } + } + if (pmap == kernel_pmap) { + pmap_remove_kernel_l2(pmap, l2, sva); + } else { + ml3 = pmap_remove_pt_page(pmap, sva); + if (ml3 != NULL) { + pmap_resident_count_dec(pmap, 1); + KASSERT(ml3->wire_count == Ln_ENTRIES, + ("pmap_remove_l2: l3 page wire count error")); + ml3->wire_count = 1; + vm_page_unwire_noq(ml3); + pmap_add_delayed_free_list(ml3, free, FALSE); + } + } + return (pmap_unuse_pt(pmap, sva, l1e, free)); +} + /* * pmap_remove_l3: do the things to unmap a page in a process */ @@ -1687,7 +2091,7 @@ pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va, pmap_pvh_free(&m->md, pmap, va); } - return (pmap_unuse_l3(pmap, va, l2e, free)); + return (pmap_unuse_pt(pmap, va, l2e, free)); } /* @@ -1699,11 +2103,11 @@ pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va, void pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { + struct spglist free; struct rwlock *lock; vm_offset_t va, va_next; - pd_entry_t *l1, *l2; - pt_entry_t l3_pte, *l3; - struct spglist free; + pd_entry_t *l1, *l2, l2e; + pt_entry_t *l3; /* * Perform an unsynchronized read. This is, however, safe. @@ -1739,16 +2143,22 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) l2 = pmap_l1_to_l2(l1, sva); if (l2 == NULL) continue; - - l3_pte = pmap_load(l2); - - /* - * Weed out invalid mappings. - */ - if (l3_pte == 0) - continue; - if ((pmap_load(l2) & PTE_RX) != 0) + if ((l2e = pmap_load(l2)) == 0) continue; + if ((l2e & PTE_RWX) != 0) { + if (sva + L2_SIZE == va_next && eva >= va_next) { + (void)pmap_remove_l2(pmap, l2, sva, + pmap_load(l1), &free, &lock); + continue; + } else if (!pmap_demote_l2_locked(pmap, l2, sva, + &lock)) { + /* + * The large page mapping was destroyed. + */ + continue; + } + l2e = pmap_load(l2); + } /* * Limit our scan to either the end of the va represented @@ -1761,8 +2171,6 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) va = va_next; for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, sva += L3_SIZE) { - if (l3 == NULL) - panic("l3 == NULL"); if (pmap_load(l3) == 0) { if (va != va_next) { pmap_invalidate_range(pmap, va, sva); @@ -1772,8 +2180,7 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) } if (va == va_next) va = sva; - if (pmap_remove_l3(pmap, l3, sva, l3_pte, &free, - &lock)) { + if (pmap_remove_l3(pmap, l3, sva, l2e, &free, &lock)) { sva += L3_SIZE; break; } @@ -1783,7 +2190,7 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) } if (lock != NULL) rw_wunlock(lock); - rw_runlock(&pvh_global_lock); + rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); vm_page_free_pages_toq(&free, false); } @@ -1804,42 +2211,54 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) void pmap_remove_all(vm_page_t m) { - pv_entry_t pv; - pmap_t pmap; - pt_entry_t *l3, tl3; - pd_entry_t *l2, tl2; struct spglist free; + struct md_page *pvh; + pmap_t pmap; + pt_entry_t *l3, l3e; + pd_entry_t *l2, l2e; + pv_entry_t pv; + vm_offset_t va; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_remove_all: page %p is not managed", m)); SLIST_INIT(&free); + pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : + pa_to_pvh(VM_PAGE_TO_PHYS(m)); + rw_wlock(&pvh_global_lock); + while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) { + pmap = PV_PMAP(pv); + PMAP_LOCK(pmap); + va = pv->pv_va; + l2 = pmap_l2(pmap, va); + (void)pmap_demote_l2(pmap, l2, va); + PMAP_UNLOCK(pmap); + } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pmap_resident_count_dec(pmap, 1); l2 = pmap_l2(pmap, pv->pv_va); KASSERT(l2 != NULL, ("pmap_remove_all: no l2 table found")); - tl2 = pmap_load(l2); + l2e = pmap_load(l2); - KASSERT((tl2 & PTE_RX) == 0, - ("pmap_remove_all: found a table when expecting " - "a block in %p's pv list", m)); + KASSERT((l2e & PTE_RX) == 0, + ("pmap_remove_all: found a superpage in %p's pv list", m)); l3 = pmap_l2_to_l3(l2, pv->pv_va); - tl3 = pmap_load_clear(l3); + l3e = pmap_load_clear(l3); pmap_invalidate_page(pmap, pv->pv_va); - if (tl3 & PTE_SW_WIRED) + if (l3e & PTE_SW_WIRED) pmap->pm_stats.wired_count--; - if ((tl3 & PTE_A) != 0) + if ((l3e & PTE_A) != 0) vm_page_aflag_set(m, PGA_REFERENCED); /* * Update the vm_page_t clean and reference bits. */ - if ((tl3 & PTE_D) != 0) + if ((l3e & PTE_D) != 0) vm_page_dirty(m); - pmap_unuse_l3(pmap, pv->pv_va, pmap_load(l2), &free); + pmap_unuse_pt(pmap, pv->pv_va, pmap_load(l2), &free); TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; free_pv_entry(pmap, pv); @@ -1857,10 +2276,12 @@ pmap_remove_all(vm_page_t m) void pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) { - pd_entry_t *l1, *l2; + pd_entry_t *l1, *l2, l2e; pt_entry_t *l3, l3e, mask; vm_page_t m; - vm_offset_t va_next; + vm_paddr_t pa; + vm_offset_t va, va_next; + bool anychanged, pv_lists_locked; if ((prot & VM_PROT_READ) == VM_PROT_NONE) { pmap_remove(pmap, sva, eva); @@ -1871,12 +2292,14 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) (VM_PROT_WRITE | VM_PROT_EXECUTE)) return; + anychanged = false; + pv_lists_locked = false; mask = 0; if ((prot & VM_PROT_WRITE) == 0) mask |= PTE_W | PTE_D; if ((prot & VM_PROT_EXECUTE) == 0) mask |= PTE_X; - +resume: PMAP_LOCK(pmap); for (; sva < eva; sva = va_next) { l1 = pmap_l1(pmap, sva); @@ -1892,10 +2315,41 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) va_next = eva; l2 = pmap_l1_to_l2(l1, sva); - if (l2 == NULL || pmap_load(l2) == 0) - continue; - if ((pmap_load(l2) & PTE_RX) != 0) + if (l2 == NULL || (l2e = pmap_load(l2)) == 0) continue; + if ((l2e & PTE_RWX) != 0) { + if (sva + L2_SIZE == va_next && eva >= va_next) { +retryl2: + if ((l2e & (PTE_SW_MANAGED | PTE_D)) == + (PTE_SW_MANAGED | PTE_D)) { + pa = PTE_TO_PHYS(l2e); + for (va = sva, m = PHYS_TO_VM_PAGE(pa); + va < va_next; m++, va += PAGE_SIZE) + vm_page_dirty(m); + } + if (!atomic_fcmpset_long(l2, &l2e, l2e & ~mask)) + goto retryl2; + anychanged = true; + } else { + if (!pv_lists_locked) { + pv_lists_locked = true; + if (!rw_try_rlock(&pvh_global_lock)) { + if (anychanged) + pmap_invalidate_all( + pmap); + PMAP_UNLOCK(pmap); + rw_rlock(&pvh_global_lock); + goto resume; + } + } + if (!pmap_demote_l2(pmap, l2, sva)) { + /* + * The large page mapping was destroyed. + */ + continue; + } + } + } if (va_next > eva) va_next = eva; @@ -1903,7 +2357,7 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, sva += L3_SIZE) { l3e = pmap_load(l3); -retry: +retryl3: if ((l3e & PTE_V) == 0) continue; if ((prot & VM_PROT_WRITE) == 0 && @@ -1913,59 +2367,235 @@ retry: vm_page_dirty(m); } if (!atomic_fcmpset_long(l3, &l3e, l3e & ~mask)) - goto retry; - /* XXX: Use pmap_invalidate_range */ - pmap_invalidate_page(pmap, sva); + goto retryl3; + anychanged = true; } } + if (anychanged) + pmap_invalidate_all(pmap); + if (pv_lists_locked) + rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); } int pmap_fault_fixup(pmap_t pmap, vm_offset_t va, vm_prot_t ftype) { - pt_entry_t orig_l3; - pt_entry_t new_l3; - pt_entry_t *l3; + pd_entry_t *l2, l2e; + pt_entry_t bits, *pte, oldpte; int rv; rv = 0; - PMAP_LOCK(pmap); - - l3 = pmap_l3(pmap, va); - if (l3 == NULL) - goto done; - - orig_l3 = pmap_load(l3); - if ((orig_l3 & PTE_V) == 0 || - (ftype == VM_PROT_WRITE && (orig_l3 & PTE_W) == 0) || - (ftype == VM_PROT_EXECUTE && (orig_l3 & PTE_X) == 0) || - (ftype == VM_PROT_READ && (orig_l3 & PTE_R) == 0)) - goto done; - - new_l3 = orig_l3 | PTE_A; - if (ftype == VM_PROT_WRITE) - new_l3 |= PTE_D; - - if (orig_l3 != new_l3) { - pmap_store(l3, new_l3); - pmap_invalidate_page(pmap, va); - rv = 1; + l2 = pmap_l2(pmap, va); + if (l2 == NULL || ((l2e = pmap_load(l2)) & PTE_V) == 0) goto done; + if ((l2e & PTE_RWX) == 0) { + pte = pmap_l2_to_l3(l2, va); + if (pte == NULL || ((oldpte = pmap_load(pte) & PTE_V)) == 0) + goto done; + } else { + pte = l2; + oldpte = l2e; } - /* - * XXX: This case should never happen since it means - * the PTE shouldn't have resulted in a fault. - */ + if ((pmap != kernel_pmap && (oldpte & PTE_U) == 0) || + (ftype == VM_PROT_WRITE && (oldpte & PTE_W) == 0) || + (ftype == VM_PROT_EXECUTE && (oldpte & PTE_X) == 0) || + (ftype == VM_PROT_READ && (oldpte & PTE_R) == 0)) + goto done; + bits = PTE_A; + if (ftype == VM_PROT_WRITE) + bits |= PTE_D; + + /* + * Spurious faults can occur if the implementation caches invalid + * entries in the TLB, or if simultaneous accesses on multiple CPUs + * race with each other. + */ + if ((oldpte & bits) != bits) + pmap_store_bits(pte, bits); + sfence_vma(); + rv = 1; done: PMAP_UNLOCK(pmap); - return (rv); } +static bool +pmap_demote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va) +{ + struct rwlock *lock; + bool rv; + + lock = NULL; + rv = pmap_demote_l2_locked(pmap, l2, va, &lock); + if (lock != NULL) + rw_wunlock(lock); + return (rv); +} + +/* + * Tries to demote a 2MB page mapping. If demotion fails, the 2MB page + * mapping is invalidated. + */ +static bool +pmap_demote_l2_locked(pmap_t pmap, pd_entry_t *l2, vm_offset_t va, + struct rwlock **lockp) +{ + struct spglist free; + vm_page_t mpte; + pd_entry_t newl2, oldl2; + pt_entry_t *firstl3, newl3; + vm_paddr_t mptepa; + int i; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + + oldl2 = pmap_load(l2); + KASSERT((oldl2 & PTE_RWX) != 0, + ("pmap_demote_l2_locked: oldl2 is not a leaf entry")); + if ((oldl2 & PTE_A) == 0 || (mpte = pmap_remove_pt_page(pmap, va)) == + NULL) { + if ((oldl2 & PTE_A) == 0 || (mpte = vm_page_alloc(NULL, + pmap_l2_pindex(va), (VIRT_IN_DMAP(va) ? VM_ALLOC_INTERRUPT : + VM_ALLOC_NORMAL) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == + NULL) { + SLIST_INIT(&free); + (void)pmap_remove_l2(pmap, l2, va & ~L2_OFFSET, + pmap_load(pmap_l1(pmap, va)), &free, lockp); + vm_page_free_pages_toq(&free, true); + CTR2(KTR_PMAP, "pmap_demote_l2_locked: " + "failure for va %#lx in pmap %p", va, pmap); + return (false); + } + if (va < VM_MAXUSER_ADDRESS) + pmap_resident_count_inc(pmap, 1); + } + mptepa = VM_PAGE_TO_PHYS(mpte); + firstl3 = (pt_entry_t *)PHYS_TO_DMAP(mptepa); + newl2 = ((mptepa / PAGE_SIZE) << PTE_PPN0_S) | PTE_V; + KASSERT((oldl2 & PTE_A) != 0, + ("pmap_demote_l2_locked: oldl2 is missing PTE_A")); + KASSERT((oldl2 & (PTE_D | PTE_W)) != PTE_W, + ("pmap_demote_l2_locked: oldl2 is missing PTE_D")); + newl3 = oldl2; + + /* + * If the page table page is new, initialize it. + */ + if (mpte->wire_count == 1) { + mpte->wire_count = Ln_ENTRIES; + for (i = 0; i < Ln_ENTRIES; i++) + pmap_store(firstl3 + i, newl3 + (i << PTE_PPN0_S)); + } + KASSERT(PTE_TO_PHYS(pmap_load(firstl3)) == PTE_TO_PHYS(newl3), + ("pmap_demote_l2_locked: firstl3 and newl3 map different physical " + "addresses")); + + /* + * If the mapping has changed attributes, update the page table + * entries. + */ + if ((pmap_load(firstl3) & PTE_PROMOTE) != (newl3 & PTE_PROMOTE)) + for (i = 0; i < Ln_ENTRIES; i++) + pmap_store(firstl3 + i, newl3 + (i << PTE_PPN0_S)); + + /* + * The spare PV entries must be reserved prior to demoting the + * mapping, that is, prior to changing the L2 entry. Otherwise, the + * state of the L2 entry and the PV lists will be inconsistent, which + * can result in reclaim_pv_chunk() attempting to remove a PV entry from + * the wrong PV list and pmap_pv_demote_l2() failing to find the + * expected PV entry for the 2MB page mapping that is being demoted. + */ + if ((oldl2 & PTE_SW_MANAGED) != 0) + reserve_pv_entries(pmap, Ln_ENTRIES - 1, lockp); + + /* + * Demote the mapping. + */ + pmap_store(l2, newl2); + + /* + * Demote the PV entry. + */ + if ((oldl2 & PTE_SW_MANAGED) != 0) + pmap_pv_demote_l2(pmap, va, PTE_TO_PHYS(oldl2), lockp); + + atomic_add_long(&pmap_l2_demotions, 1); + CTR2(KTR_PMAP, "pmap_demote_l2_locked: success for va %#lx in pmap %p", + va, pmap); + return (true); +} + +#if VM_NRESERVLEVEL > 0 +static void +pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va, + struct rwlock **lockp) +{ + pt_entry_t *firstl3, *l3; + vm_paddr_t pa; + vm_page_t ml3; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + + va &= ~L2_OFFSET; + KASSERT((pmap_load(l2) & PTE_RWX) == 0, + ("pmap_promote_l2: invalid l2 entry %p", l2)); + + firstl3 = (pt_entry_t *)PHYS_TO_DMAP(PTE_TO_PHYS(pmap_load(l2))); + pa = PTE_TO_PHYS(pmap_load(firstl3)); + if ((pa & L2_OFFSET) != 0) { + CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx pmap %p", + va, pmap); + atomic_add_long(&pmap_l2_p_failures, 1); + return; + } + + pa += PAGE_SIZE; + for (l3 = firstl3 + 1; l3 < firstl3 + Ln_ENTRIES; l3++) { + if (PTE_TO_PHYS(pmap_load(l3)) != pa) { + CTR2(KTR_PMAP, + "pmap_promote_l2: failure for va %#lx pmap %p", + va, pmap); + atomic_add_long(&pmap_l2_p_failures, 1); + return; + } + if ((pmap_load(l3) & PTE_PROMOTE) != + (pmap_load(firstl3) & PTE_PROMOTE)) { + CTR2(KTR_PMAP, + "pmap_promote_l2: failure for va %#lx pmap %p", + va, pmap); + atomic_add_long(&pmap_l2_p_failures, 1); + return; + } + pa += PAGE_SIZE; + } + + ml3 = PHYS_TO_VM_PAGE(PTE_TO_PHYS(pmap_load(l2))); + KASSERT(ml3->pindex == pmap_l2_pindex(va), + ("pmap_promote_l2: page table page's pindex is wrong")); + if (pmap_insert_pt_page(pmap, ml3)) { + CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx pmap %p", + va, pmap); + atomic_add_long(&pmap_l2_p_failures, 1); + return; + } + + if ((pmap_load(firstl3) & PTE_SW_MANAGED) != 0) + pmap_pv_promote_l2(pmap, va, PTE_TO_PHYS(pmap_load(firstl3)), + lockp); + + pmap_store(l2, pmap_load(firstl3)); + + atomic_add_long(&pmap_l2_promotions, 1); + CTR2(KTR_PMAP, "pmap_promote_l2: success for va %#lx in pmap %p", va, + pmap); +} +#endif + /* * Insert the given physical page (p) at * the specified virtual address (v) in the @@ -1980,20 +2610,19 @@ done: */ int pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, - u_int flags, int8_t psind __unused) + u_int flags, int8_t psind) { struct rwlock *lock; - pd_entry_t *l1, *l2; + pd_entry_t *l1, *l2, l2e; pt_entry_t new_l3, orig_l3; pt_entry_t *l3; pv_entry_t pv; vm_paddr_t opa, pa, l2_pa, l3_pa; vm_page_t mpte, om, l2_m, l3_m; - boolean_t nosleep; pt_entry_t entry; - pn_t l2_pn; - pn_t l3_pn; - pn_t pn; + pn_t l2_pn, l3_pn, pn; + int rv; + bool nosleep; va = trunc_page(va); if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) @@ -2008,7 +2637,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, new_l3 |= PTE_D; if (prot & VM_PROT_WRITE) new_l3 |= PTE_W; - if ((va >> 63) == 0) + if (va < VM_MAX_USER_ADDRESS) new_l3 |= PTE_U; new_l3 |= (pn << PTE_PPN0_S); @@ -2028,13 +2657,29 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, CTR2(KTR_PMAP, "pmap_enter: %.16lx -> %.16lx", va, pa); - mpte = NULL; - lock = NULL; + mpte = NULL; rw_rlock(&pvh_global_lock); PMAP_LOCK(pmap); + if (psind == 1) { + /* Assert the required virtual and physical alignment. */ + KASSERT((va & L2_OFFSET) == 0, + ("pmap_enter: va %#lx unaligned", va)); + KASSERT(m->psind > 0, ("pmap_enter: m->psind < psind")); + rv = pmap_enter_l2(pmap, va, new_l3, flags, m, &lock); + goto out; + } - if (va < VM_MAXUSER_ADDRESS) { + l2 = pmap_l2(pmap, va); + if (l2 != NULL && ((l2e = pmap_load(l2)) & PTE_V) != 0 && + ((l2e & PTE_RWX) == 0 || pmap_demote_l2_locked(pmap, l2, + va, &lock))) { + l3 = pmap_l2_to_l3(l2, va); + if (va < VM_MAXUSER_ADDRESS) { + mpte = PHYS_TO_VM_PAGE(PTE_TO_PHYS(pmap_load(l2))); + mpte->wire_count++; + } + } else if (va < VM_MAXUSER_ADDRESS) { nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0; mpte = pmap_alloc_l3(pmap, va, nosleep ? NULL : &lock); if (mpte == NULL && nosleep) { @@ -2050,7 +2695,6 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, l3 = pmap_l3(pmap, va); /* TODO: This is not optimal, but should mostly work */ if (l3 == NULL) { - l2 = pmap_l2(pmap, va); if (l2 == NULL) { l2_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | @@ -2071,9 +2715,6 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, l2 = pmap_l1_to_l2(l1, va); } - KASSERT(l2 != NULL, - ("No l2 table after allocating one")); - l3_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (l3_m == NULL) @@ -2162,6 +2803,8 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_aflag_set(om, PGA_REFERENCED); CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, opa); pv = pmap_pvh_remove(&om->md, pmap, va); + KASSERT(pv != NULL, + ("pmap_enter: no PV entry for %#lx", va)); if ((new_l3 & PTE_SW_MANAGED) == 0) free_pv_entry(pmap, pv); if ((om->aflags & PGA_WRITEABLE) != 0 && @@ -2216,10 +2859,163 @@ validate: pmap_store(l3, new_l3); } +#if VM_NRESERVLEVEL > 0 + if (mpte != NULL && mpte->wire_count == Ln_ENTRIES && + pmap_ps_enabled(pmap) && + (m->flags & PG_FICTITIOUS) == 0 && + vm_reserv_level_iffullpop(m) == 0) + pmap_promote_l2(pmap, l2, va, &lock); +#endif + + rv = KERN_SUCCESS; +out: if (lock != NULL) rw_wunlock(lock); rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); + return (rv); +} + +/* + * Tries to create a read- and/or execute-only 2MB page mapping. Returns true + * if successful. Returns false if (1) a page table page cannot be allocated + * without sleeping, (2) a mapping already exists at the specified virtual + * address, or (3) a PV entry cannot be allocated without reclaiming another + * PV entry. + */ +static bool +pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, + struct rwlock **lockp) +{ + pd_entry_t new_l2; + pn_t pn; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + + pn = VM_PAGE_TO_PHYS(m) / PAGE_SIZE; + new_l2 = (pd_entry_t)((pn << PTE_PPN0_S) | PTE_R | PTE_V); + if ((m->oflags & VPO_UNMANAGED) == 0) + new_l2 |= PTE_SW_MANAGED; + if ((prot & VM_PROT_EXECUTE) != 0) + new_l2 |= PTE_X; + if (va < VM_MAXUSER_ADDRESS) + new_l2 |= PTE_U; + return (pmap_enter_l2(pmap, va, new_l2, PMAP_ENTER_NOSLEEP | + PMAP_ENTER_NOREPLACE | PMAP_ENTER_NORECLAIM, NULL, lockp) == + KERN_SUCCESS); +} + +/* + * Tries to create the specified 2MB page mapping. Returns KERN_SUCCESS if + * the mapping was created, and either KERN_FAILURE or KERN_RESOURCE_SHORTAGE + * otherwise. Returns KERN_FAILURE if PMAP_ENTER_NOREPLACE was specified and + * a mapping already exists at the specified virtual address. Returns + * KERN_RESOURCE_SHORTAGE if PMAP_ENTER_NOSLEEP was specified and a page table + * page allocation failed. Returns KERN_RESOURCE_SHORTAGE if + * PMAP_ENTER_NORECLAIM was specified and a PV entry allocation failed. + * + * The parameter "m" is only used when creating a managed, writeable mapping. + */ +static int +pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2, u_int flags, + vm_page_t m, struct rwlock **lockp) +{ + struct spglist free; + pd_entry_t *l2, *l3, oldl2; + vm_offset_t sva; + vm_page_t l2pg, mt; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + + if ((l2pg = pmap_alloc_l2(pmap, va, (flags & PMAP_ENTER_NOSLEEP) != 0 ? + NULL : lockp)) == NULL) { + CTR2(KTR_PMAP, "pmap_enter_l2: failure for va %#lx in pmap %p", + va, pmap); + return (KERN_RESOURCE_SHORTAGE); + } + + l2 = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(l2pg)); + l2 = &l2[pmap_l2_index(va)]; + if ((oldl2 = pmap_load(l2)) != 0) { + KASSERT(l2pg->wire_count > 1, + ("pmap_enter_l2: l2pg's wire count is too low")); + if ((flags & PMAP_ENTER_NOREPLACE) != 0) { + l2pg->wire_count--; + CTR2(KTR_PMAP, + "pmap_enter_l2: failure for va %#lx in pmap %p", + va, pmap); + return (KERN_FAILURE); + } + SLIST_INIT(&free); + if ((oldl2 & PTE_RWX) != 0) + (void)pmap_remove_l2(pmap, l2, va, + pmap_load(pmap_l1(pmap, va)), &free, lockp); + else + for (sva = va; sva < va + L2_SIZE; sva += PAGE_SIZE) { + l3 = pmap_l2_to_l3(l2, sva); + if ((pmap_load(l3) & PTE_V) != 0 && + pmap_remove_l3(pmap, l3, sva, oldl2, &free, + lockp) != 0) + break; + } + vm_page_free_pages_toq(&free, true); + if (va >= VM_MAXUSER_ADDRESS) { + mt = PHYS_TO_VM_PAGE(PTE_TO_PHYS(pmap_load(l2))); + if (pmap_insert_pt_page(pmap, mt)) { + /* + * XXX Currently, this can't happen bacuse + * we do not perform pmap_enter(psind == 1) + * on the kernel pmap. + */ + panic("pmap_enter_l2: trie insert failed"); + } + } else + KASSERT(pmap_load(l2) == 0, + ("pmap_enter_l2: non-zero L2 entry %p", l2)); + } + + if ((new_l2 & PTE_SW_MANAGED) != 0) { + /* + * Abort this mapping if its PV entry could not be created. + */ + if (!pmap_pv_insert_l2(pmap, va, new_l2, flags, lockp)) { + SLIST_INIT(&free); + if (pmap_unwire_ptp(pmap, va, l2pg, &free)) { + /* + * Although "va" is not mapped, paging-structure + * caches could nonetheless have entries that + * refer to the freed page table pages. + * Invalidate those entries. + */ + pmap_invalidate_page(pmap, va); + vm_page_free_pages_toq(&free, true); + } + CTR2(KTR_PMAP, + "pmap_enter_l2: failure for va %#lx in pmap %p", + va, pmap); + return (KERN_RESOURCE_SHORTAGE); + } + if ((new_l2 & PTE_W) != 0) + for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++) + vm_page_aflag_set(mt, PGA_WRITEABLE); + } + + /* + * Increment counters. + */ + if ((new_l2 & PTE_SW_WIRED) != 0) + pmap->pm_stats.wired_count += L2_SIZE / PAGE_SIZE; + pmap->pm_stats.resident_count += L2_SIZE / PAGE_SIZE; + + /* + * Map the superpage. + */ + pmap_store(l2, new_l2); + + atomic_add_long(&pmap_l2_mappings, 1); + CTR2(KTR_PMAP, "pmap_enter_l2: success for va %#lx in pmap %p", + va, pmap); + return (KERN_SUCCESS); } @@ -2254,7 +3050,13 @@ pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, PMAP_LOCK(pmap); while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { va = start + ptoa(diff); - mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte, &lock); + if ((va & L2_OFFSET) == 0 && va + L2_SIZE <= end && + m->psind == 1 && pmap_ps_enabled(pmap) && + pmap_enter_2mpage(pmap, va, m, prot, &lock)) + m = &m[L2_SIZE / PAGE_SIZE - 1]; + else + mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte, + &lock); m = TAILQ_NEXT(m, listq); } if (lock != NULL) @@ -2365,7 +3167,7 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, !pmap_try_insert_pv_entry(pmap, va, m, lockp)) { if (mpte != NULL) { SLIST_INIT(&free); - if (pmap_unwire_l3(pmap, va, mpte, &free)) { + if (pmap_unwire_ptp(pmap, va, mpte, &free)) { pmap_invalidate_page(pmap, va); vm_page_free_pages_toq(&free, false); } @@ -2429,11 +3231,12 @@ void pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t va_next; - pd_entry_t *l1, *l2; - pt_entry_t *l3; - boolean_t pv_lists_locked; + pd_entry_t *l1, *l2, l2e; + pt_entry_t *l3, l3e; + bool pv_lists_locked; - pv_lists_locked = FALSE; + pv_lists_locked = false; +retry: PMAP_LOCK(pmap); for (; sva < eva; sva = va_next) { l1 = pmap_l1(pmap, sva); @@ -2449,25 +3252,46 @@ pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) va_next = eva; l2 = pmap_l1_to_l2(l1, sva); - if (pmap_load(l2) == 0) + if ((l2e = pmap_load(l2)) == 0) continue; + if ((l2e & PTE_RWX) != 0) { + if (sva + L2_SIZE == va_next && eva >= va_next) { + if ((l2e & PTE_SW_WIRED) == 0) + panic("pmap_unwire: l2 %#jx is missing " + "PTE_SW_WIRED", (uintmax_t)l2e); + pmap_clear_bits(l2, PTE_SW_WIRED); + continue; + } else { + if (!pv_lists_locked) { + pv_lists_locked = true; + if (!rw_try_rlock(&pvh_global_lock)) { + PMAP_UNLOCK(pmap); + rw_rlock(&pvh_global_lock); + /* Repeat sva. */ + goto retry; + } + } + if (!pmap_demote_l2(pmap, l2, sva)) + panic("pmap_unwire: demotion failed"); + } + } if (va_next > eva) va_next = eva; for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, sva += L3_SIZE) { - if (pmap_load(l3) == 0) + if ((l3e = pmap_load(l3)) == 0) continue; - if ((pmap_load(l3) & PTE_SW_WIRED) == 0) + if ((l3e & PTE_SW_WIRED) == 0) panic("pmap_unwire: l3 %#jx is missing " - "PTE_SW_WIRED", (uintmax_t)*l3); + "PTE_SW_WIRED", (uintmax_t)l3e); /* * PG_W must be cleared atomically. Although the pmap * lock synchronizes access to PG_W, another processor * could be setting PG_M and/or PG_A concurrently. */ - atomic_clear_long(l3, PTE_SW_WIRED); + pmap_clear_bits(l3, PTE_SW_WIRED); pmap->pm_stats.wired_count--; } } @@ -2595,6 +3419,7 @@ pmap_quick_remove_page(vm_offset_t addr) boolean_t pmap_page_exists_quick(pmap_t pmap, vm_page_t m) { + struct md_page *pvh; struct rwlock *lock; pv_entry_t pv; int loops = 0; @@ -2615,6 +3440,18 @@ pmap_page_exists_quick(pmap_t pmap, vm_page_t m) if (loops >= 16) break; } + if (!rv && loops < 16 && (m->flags & PG_FICTITIOUS) == 0) { + pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { + if (PV_PMAP(pv) == pmap) { + rv = TRUE; + break; + } + loops++; + if (loops >= 16) + break; + } + } rw_runlock(lock); rw_runlock(&pvh_global_lock); return (rv); @@ -2629,11 +3466,13 @@ pmap_page_exists_quick(pmap_t pmap, vm_page_t m) int pmap_page_wired_mappings(vm_page_t m) { + struct md_page *pvh; struct rwlock *lock; pmap_t pmap; + pd_entry_t *l2; pt_entry_t *l3; pv_entry_t pv; - int count, md_gen; + int count, md_gen, pvh_gen; if ((m->oflags & VPO_UNMANAGED) != 0) return (0); @@ -2659,11 +3498,72 @@ restart: count++; PMAP_UNLOCK(pmap); } + if ((m->flags & PG_FICTITIOUS) == 0) { + pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { + pmap = PV_PMAP(pv); + if (!PMAP_TRYLOCK(pmap)) { + md_gen = m->md.pv_gen; + pvh_gen = pvh->pv_gen; + rw_runlock(lock); + PMAP_LOCK(pmap); + rw_rlock(lock); + if (md_gen != m->md.pv_gen || + pvh_gen != pvh->pv_gen) { + PMAP_UNLOCK(pmap); + goto restart; + } + } + l2 = pmap_l2(pmap, pv->pv_va); + if ((pmap_load(l2) & PTE_SW_WIRED) != 0) + count++; + PMAP_UNLOCK(pmap); + } + } rw_runlock(lock); rw_runlock(&pvh_global_lock); return (count); } +static void +pmap_remove_pages_pv(pmap_t pmap, vm_page_t m, pv_entry_t pv, + struct spglist *free, bool superpage) +{ + struct md_page *pvh; + vm_page_t mpte, mt; + + if (superpage) { + pmap_resident_count_dec(pmap, Ln_ENTRIES); + pvh = pa_to_pvh(m->phys_addr); + TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); + pvh->pv_gen++; + if (TAILQ_EMPTY(&pvh->pv_list)) { + for (mt = m; mt < &m[Ln_ENTRIES]; mt++) + if (TAILQ_EMPTY(&mt->md.pv_list) && + (mt->aflags & PGA_WRITEABLE) != 0) + vm_page_aflag_clear(mt, PGA_WRITEABLE); + } + mpte = pmap_remove_pt_page(pmap, pv->pv_va); + if (mpte != NULL) { + pmap_resident_count_dec(pmap, 1); + KASSERT(mpte->wire_count == Ln_ENTRIES, + ("pmap_remove_pages: pte page wire count error")); + mpte->wire_count = 0; + pmap_add_delayed_free_list(mpte, free, FALSE); + } + } else { + pmap_resident_count_dec(pmap, 1); + TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); + m->md.pv_gen++; + if (TAILQ_EMPTY(&m->md.pv_list) && + (m->aflags & PGA_WRITEABLE) != 0) { + pvh = pa_to_pvh(m->phys_addr); + if (TAILQ_EMPTY(&pvh->pv_list)) + vm_page_aflag_clear(m, PGA_WRITEABLE); + } + } +} + /* * Destroy all managed, non-wired mappings in the given user-space * pmap. This pmap cannot be active on any processor besides the @@ -2683,17 +3583,17 @@ restart: void pmap_remove_pages(pmap_t pmap) { - pd_entry_t ptepde, *l2; - pt_entry_t *l3, tl3; struct spglist free; - vm_page_t m; + pd_entry_t ptepde; + pt_entry_t *pte, tpte; + vm_page_t m, mt; pv_entry_t pv; struct pv_chunk *pc, *npc; struct rwlock *lock; int64_t bit; uint64_t inuse, bitmask; int allfree, field, freed, idx; - vm_paddr_t pa; + bool superpage; lock = NULL; @@ -2712,53 +3612,57 @@ pmap_remove_pages(pmap_t pmap) pv = &pc->pc_pventry[idx]; inuse &= ~bitmask; - l2 = pmap_l2(pmap, pv->pv_va); - ptepde = pmap_load(l2); - l3 = pmap_l2_to_l3(l2, pv->pv_va); - tl3 = pmap_load(l3); + pte = pmap_l1(pmap, pv->pv_va); + ptepde = pmap_load(pte); + pte = pmap_l1_to_l2(pte, pv->pv_va); + tpte = pmap_load(pte); + if ((tpte & PTE_RWX) != 0) { + superpage = true; + } else { + ptepde = tpte; + pte = pmap_l2_to_l3(pte, pv->pv_va); + tpte = pmap_load(pte); + superpage = false; + } /* * We cannot remove wired pages from a * process' mapping at this time. */ - if (tl3 & PTE_SW_WIRED) { + if (tpte & PTE_SW_WIRED) { allfree = 0; continue; } - pa = PTE_TO_PHYS(tl3); - m = PHYS_TO_VM_PAGE(pa); - KASSERT(m->phys_addr == pa, - ("vm_page_t %p phys_addr mismatch %016jx %016jx", - m, (uintmax_t)m->phys_addr, - (uintmax_t)tl3)); - + m = PHYS_TO_VM_PAGE(PTE_TO_PHYS(tpte)); KASSERT((m->flags & PG_FICTITIOUS) != 0 || m < &vm_page_array[vm_page_array_size], - ("pmap_remove_pages: bad l3 %#jx", - (uintmax_t)tl3)); + ("pmap_remove_pages: bad pte %#jx", + (uintmax_t)tpte)); - pmap_clear(l3); + pmap_clear(pte); /* * Update the vm_page_t clean/reference bits. */ - if ((tl3 & PTE_D) != 0) - vm_page_dirty(m); + if ((tpte & (PTE_D | PTE_W)) == + (PTE_D | PTE_W)) { + if (superpage) + for (mt = m; + mt < &m[Ln_ENTRIES]; mt++) + vm_page_dirty(mt); + else + vm_page_dirty(m); + } CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m); /* Mark free */ pc->pc_map[field] |= bitmask; - pmap_resident_count_dec(pmap, 1); - TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); - m->md.pv_gen++; - if (TAILQ_EMPTY(&m->md.pv_list) && - (m->aflags & PGA_WRITEABLE) != 0) - vm_page_aflag_clear(m, PGA_WRITEABLE); - - pmap_unuse_l3(pmap, pv->pv_va, ptepde, &free); + pmap_remove_pages_pv(pmap, m, pv, &free, + superpage); + pmap_unuse_pt(pmap, pv->pv_va, ptepde, &free); freed++; } } @@ -2778,20 +3682,23 @@ pmap_remove_pages(pmap_t pmap) vm_page_free_pages_toq(&free, false); } -/* - * This is used to check if a page has been accessed or modified. As we - * don't have a bit to see if it has been modified we have to assume it - * has been if the page is read/write. - */ -static boolean_t +static bool pmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified) { + struct md_page *pvh; struct rwlock *lock; + pd_entry_t *l2; + pt_entry_t *l3, mask; pv_entry_t pv; - pt_entry_t *l3, mask, value; pmap_t pmap; - int md_gen; - boolean_t rv; + int md_gen, pvh_gen; + bool rv; + + mask = 0; + if (modified) + mask |= PTE_D; + if (accessed) + mask |= PTE_A; rv = FALSE; rw_rlock(&pvh_global_lock); @@ -2811,33 +3718,34 @@ restart: } } l3 = pmap_l3(pmap, pv->pv_va); - mask = 0; - value = 0; - if (modified) { - mask |= PTE_D; - value |= PTE_D; - } - if (accessed) { - mask |= PTE_A; - value |= PTE_A; - } - -#if 0 - if (modified) { - mask |= ATTR_AP_RW_BIT; - value |= ATTR_AP(ATTR_AP_RW); - } - if (accessed) { - mask |= ATTR_AF | ATTR_DESCR_MASK; - value |= ATTR_AF | L3_PAGE; - } -#endif - - rv = (pmap_load(l3) & mask) == value; + rv = (pmap_load(l3) & mask) == mask; PMAP_UNLOCK(pmap); if (rv) goto out; } + if ((m->flags & PG_FICTITIOUS) == 0) { + pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); + TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { + pmap = PV_PMAP(pv); + if (!PMAP_TRYLOCK(pmap)) { + md_gen = m->md.pv_gen; + pvh_gen = pvh->pv_gen; + rw_runlock(lock); + PMAP_LOCK(pmap); + rw_rlock(lock); + if (md_gen != m->md.pv_gen || + pvh_gen != pvh->pv_gen) { + PMAP_UNLOCK(pmap); + goto restart; + } + } + l2 = pmap_l2(pmap, pv->pv_va); + rv = (pmap_load(l2) & mask) == mask; + PMAP_UNLOCK(pmap); + if (rv) + goto out; + } + } out: rw_runlock(lock); rw_runlock(&pvh_global_lock); @@ -2911,12 +3819,14 @@ pmap_is_referenced(vm_page_t m) void pmap_remove_write(vm_page_t m) { - pmap_t pmap; + struct md_page *pvh; struct rwlock *lock; - pv_entry_t pv; - pt_entry_t *l3, oldl3; - pt_entry_t newl3; - int md_gen; + pmap_t pmap; + pd_entry_t *l2; + pt_entry_t *l3, oldl3, newl3; + pv_entry_t next_pv, pv; + vm_offset_t va; + int md_gen, pvh_gen; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_remove_write: page %p is not managed", m)); @@ -2929,18 +3839,43 @@ pmap_remove_write(vm_page_t m) VM_OBJECT_ASSERT_WLOCKED(m->object); if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return; - rw_rlock(&pvh_global_lock); lock = VM_PAGE_TO_PV_LIST_LOCK(m); + pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : + pa_to_pvh(VM_PAGE_TO_PHYS(m)); + rw_rlock(&pvh_global_lock); retry_pv_loop: rw_wlock(lock); + TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { + pmap = PV_PMAP(pv); + if (!PMAP_TRYLOCK(pmap)) { + pvh_gen = pvh->pv_gen; + rw_wunlock(lock); + PMAP_LOCK(pmap); + rw_wlock(lock); + if (pvh_gen != pvh->pv_gen) { + PMAP_UNLOCK(pmap); + rw_wunlock(lock); + goto retry_pv_loop; + } + } + va = pv->pv_va; + l2 = pmap_l2(pmap, va); + if ((pmap_load(l2) & PTE_W) != 0) + (void)pmap_demote_l2_locked(pmap, l2, va, &lock); + KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), + ("inconsistent pv lock %p %p for page %p", + lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); + PMAP_UNLOCK(pmap); + } TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { + pvh_gen = pvh->pv_gen; md_gen = m->md.pv_gen; rw_wunlock(lock); PMAP_LOCK(pmap); rw_wlock(lock); - if (md_gen != m->md.pv_gen) { + if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); rw_wunlock(lock); goto retry_pv_loop; @@ -2964,13 +3899,6 @@ retry: rw_runlock(&pvh_global_lock); } -static __inline boolean_t -safe_to_clear_referenced(pmap_t pmap, pt_entry_t pte) -{ - - return (FALSE); -} - /* * pmap_ts_referenced: * @@ -2990,38 +3918,104 @@ safe_to_clear_referenced(pmap_t pmap, pt_entry_t pte) int pmap_ts_referenced(vm_page_t m) { + struct spglist free; + struct md_page *pvh; + struct rwlock *lock; pv_entry_t pv, pvf; pmap_t pmap; - struct rwlock *lock; - pd_entry_t *l2; - pt_entry_t *l3, old_l3; + pd_entry_t *l2, l2e; + pt_entry_t *l3, l3e; vm_paddr_t pa; - int cleared, md_gen, not_cleared; - struct spglist free; + vm_offset_t va; + int md_gen, pvh_gen, ret; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_ts_referenced: page %p is not managed", m)); SLIST_INIT(&free); - cleared = 0; + ret = 0; pa = VM_PAGE_TO_PHYS(m); + pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(pa); + lock = PHYS_TO_PV_LIST_LOCK(pa); rw_rlock(&pvh_global_lock); rw_wlock(lock); retry: - not_cleared = 0; + if ((pvf = TAILQ_FIRST(&pvh->pv_list)) == NULL) + goto small_mappings; + pv = pvf; + do { + pmap = PV_PMAP(pv); + if (!PMAP_TRYLOCK(pmap)) { + pvh_gen = pvh->pv_gen; + rw_wunlock(lock); + PMAP_LOCK(pmap); + rw_wlock(lock); + if (pvh_gen != pvh->pv_gen) { + PMAP_UNLOCK(pmap); + goto retry; + } + } + va = pv->pv_va; + l2 = pmap_l2(pmap, va); + l2e = pmap_load(l2); + if ((l2e & (PTE_W | PTE_D)) == (PTE_W | PTE_D)) { + /* + * Although l2e is mapping a 2MB page, because + * this function is called at a 4KB page granularity, + * we only update the 4KB page under test. + */ + vm_page_dirty(m); + } + if ((l2e & PTE_A) != 0) { + /* + * Since this reference bit is shared by 512 4KB + * pages, it should not be cleared every time it is + * tested. Apply a simple "hash" function on the + * physical page number, the virtual superpage number, + * and the pmap address to select one 4KB page out of + * the 512 on which testing the reference bit will + * result in clearing that reference bit. This + * function is designed to avoid the selection of the + * same 4KB page for every 2MB page mapping. + * + * On demotion, a mapping that hasn't been referenced + * is simply destroyed. To avoid the possibility of a + * subsequent page fault on a demoted wired mapping, + * always leave its reference bit set. Moreover, + * since the superpage is wired, the current state of + * its reference bit won't affect page replacement. + */ + if ((((pa >> PAGE_SHIFT) ^ (pv->pv_va >> L2_SHIFT) ^ + (uintptr_t)pmap) & (Ln_ENTRIES - 1)) == 0 && + (l2e & PTE_SW_WIRED) == 0) { + pmap_clear_bits(l2, PTE_A); + pmap_invalidate_page(pmap, va); + } + ret++; + } + PMAP_UNLOCK(pmap); + /* Rotate the PV list if it has more than one entry. */ + if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) { + TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); + TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); + pvh->pv_gen++; + } + if (ret >= PMAP_TS_REFERENCED_MAX) + goto out; + } while ((pv = TAILQ_FIRST(&pvh->pv_list)) != pvf); +small_mappings: if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL) goto out; pv = pvf; do { - if (pvf == NULL) - pvf = pv; pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { + pvh_gen = pvh->pv_gen; md_gen = m->md.pv_gen; rw_wunlock(lock); PMAP_LOCK(pmap); rw_wlock(lock); - if (md_gen != m->md.pv_gen) { + if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); goto retry; } @@ -3032,36 +4026,21 @@ retry: ("pmap_ts_referenced: found an invalid l2 table")); l3 = pmap_l2_to_l3(l2, pv->pv_va); - old_l3 = pmap_load(l3); - if ((old_l3 & PTE_D) != 0) + l3e = pmap_load(l3); + if ((l3e & PTE_D) != 0) vm_page_dirty(m); - if ((old_l3 & PTE_A) != 0) { - if (safe_to_clear_referenced(pmap, old_l3)) { - /* - * TODO: We don't handle the access flag - * at all. We need to be able to set it in - * the exception handler. - */ - panic("RISCVTODO: safe_to_clear_referenced\n"); - } else if ((old_l3 & PTE_SW_WIRED) == 0) { + if ((l3e & PTE_A) != 0) { + if ((l3e & PTE_SW_WIRED) == 0) { /* * Wired pages cannot be paged out so * doing accessed bit emulation for * them is wasted effort. We do the * hard work for unwired pages only. */ - pmap_remove_l3(pmap, l3, pv->pv_va, - pmap_load(l2), &free, &lock); + pmap_clear_bits(l3, PTE_A); pmap_invalidate_page(pmap, pv->pv_va); - cleared++; - if (pvf == pv) - pvf = NULL; - pv = NULL; - KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), - ("inconsistent pv lock %p %p for page %p", - lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); - } else - not_cleared++; + } + ret++; } PMAP_UNLOCK(pmap); /* Rotate the PV list if it has more than one entry. */ @@ -3070,13 +4049,13 @@ retry: TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; } - } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared + - not_cleared < PMAP_TS_REFERENCED_MAX); + } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && ret < + PMAP_TS_REFERENCED_MAX); out: rw_wunlock(lock); rw_runlock(&pvh_global_lock); vm_page_free_pages_toq(&free, false); - return (cleared + not_cleared); + return (ret); } /* @@ -3163,7 +4142,7 @@ retry: l2 = pmap_l2(pmap, addr); if (l2 != NULL && ((tpte = pmap_load(l2)) & PTE_V) != 0) { - if ((tpte & (PTE_R | PTE_W | PTE_X)) != 0) { + if ((tpte & PTE_RWX) != 0) { pa = PTE_TO_PHYS(tpte) | (addr & L2_OFFSET); val = MINCORE_INCORE | MINCORE_SUPER; } else { @@ -3241,6 +4220,20 @@ void pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, vm_offset_t *addr, vm_size_t size) { + vm_offset_t superpage_offset; + + if (size < L2_SIZE) + return; + if (object != NULL && (object->flags & OBJ_COLORED) != 0) + offset += ptoa(object->pg_color); + superpage_offset = offset & L2_OFFSET; + if (size - ((L2_SIZE - superpage_offset) & L2_OFFSET) < L2_SIZE || + (*addr & L2_OFFSET) == superpage_offset) + return; + if ((*addr & L2_OFFSET) < superpage_offset) + *addr = (*addr & ~L2_OFFSET) + superpage_offset; + else + *addr = ((*addr + L2_OFFSET) & ~L2_OFFSET) + superpage_offset; } /** diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index e095ccc69ed..4bf8869b997 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -271,7 +271,8 @@ vm_fault_soft_fast(struct faultstate *fs, vm_offset_t vaddr, vm_prot_t prot, { vm_page_t m, m_map; #if (defined(__aarch64__) || defined(__amd64__) || (defined(__arm__) && \ - __ARM_ARCH >= 6) || defined(__i386__)) && VM_NRESERVLEVEL > 0 + __ARM_ARCH >= 6) || defined(__i386__) || defined(__riscv)) && \ + VM_NRESERVLEVEL > 0 vm_page_t m_super; int flags; #endif @@ -286,7 +287,8 @@ vm_fault_soft_fast(struct faultstate *fs, vm_offset_t vaddr, vm_prot_t prot, m_map = m; psind = 0; #if (defined(__aarch64__) || defined(__amd64__) || (defined(__arm__) && \ - __ARM_ARCH >= 6) || defined(__i386__)) && VM_NRESERVLEVEL > 0 + __ARM_ARCH >= 6) || defined(__i386__) || defined(__riscv)) && \ + VM_NRESERVLEVEL > 0 if ((m->flags & PG_FICTITIOUS) == 0 && (m_super = vm_reserv_to_superpage(m)) != NULL && rounddown2(vaddr, pagesizes[m_super->psind]) >= fs->entry->start && @@ -463,7 +465,7 @@ vm_fault_populate(struct faultstate *fs, vm_prot_t prot, int fault_type, pidx += npages, m = vm_page_next(&m[npages - 1])) { vaddr = fs->entry->start + IDX_TO_OFF(pidx) - fs->entry->offset; #if defined(__aarch64__) || defined(__amd64__) || (defined(__arm__) && \ - __ARM_ARCH >= 6) || defined(__i386__) + __ARM_ARCH >= 6) || defined(__i386__) || defined(__riscv) psind = m->psind; if (psind > 0 && ((vaddr & (pagesizes[psind] - 1)) != 0 || pidx + OFF_TO_IDX(pagesizes[psind]) - 1 > pager_last || From 91c85dd88b978ea956824fd4f49c14f0a1a59c8f Mon Sep 17 00:00:00 2001 From: Mark Johnston Date: Wed, 13 Feb 2019 17:38:47 +0000 Subject: [PATCH 69/93] Implement pmap_clear_modify() for RISC-V. Reviewed by: kib Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D18875 --- sys/riscv/riscv/pmap.c | 82 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 80 insertions(+), 2 deletions(-) diff --git a/sys/riscv/riscv/pmap.c b/sys/riscv/riscv/pmap.c index 3929fe1b3e5..18400128790 100644 --- a/sys/riscv/riscv/pmap.c +++ b/sys/riscv/riscv/pmap.c @@ -4074,6 +4074,14 @@ pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) void pmap_clear_modify(vm_page_t m) { + struct md_page *pvh; + struct rwlock *lock; + pmap_t pmap; + pv_entry_t next_pv, pv; + pd_entry_t *l2, oldl2; + pt_entry_t *l3, oldl3; + vm_offset_t va; + int md_gen, pvh_gen; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_clear_modify: page %p is not managed", m)); @@ -4088,8 +4096,78 @@ pmap_clear_modify(vm_page_t m) */ if ((m->aflags & PGA_WRITEABLE) == 0) return; - - /* RISCVTODO: We lack support for tracking if a page is modified */ + pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : + pa_to_pvh(VM_PAGE_TO_PHYS(m)); + lock = VM_PAGE_TO_PV_LIST_LOCK(m); + rw_rlock(&pvh_global_lock); + rw_wlock(lock); +restart: + TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { + pmap = PV_PMAP(pv); + if (!PMAP_TRYLOCK(pmap)) { + pvh_gen = pvh->pv_gen; + rw_wunlock(lock); + PMAP_LOCK(pmap); + rw_wlock(lock); + if (pvh_gen != pvh->pv_gen) { + PMAP_UNLOCK(pmap); + goto restart; + } + } + va = pv->pv_va; + l2 = pmap_l2(pmap, va); + oldl2 = pmap_load(l2); + if ((oldl2 & PTE_W) != 0) { + if (pmap_demote_l2_locked(pmap, l2, va, &lock)) { + if ((oldl2 & PTE_SW_WIRED) == 0) { + /* + * Write protect the mapping to a + * single page so that a subsequent + * write access may repromote. + */ + va += VM_PAGE_TO_PHYS(m) - + PTE_TO_PHYS(oldl2); + l3 = pmap_l2_to_l3(l2, va); + oldl3 = pmap_load(l3); + if ((oldl3 & PTE_V) != 0) { + while (!atomic_fcmpset_long(l3, + &oldl3, oldl3 & ~(PTE_D | + PTE_W))) + cpu_spinwait(); + vm_page_dirty(m); + pmap_invalidate_page(pmap, va); + } + } + } + } + PMAP_UNLOCK(pmap); + } + TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { + pmap = PV_PMAP(pv); + if (!PMAP_TRYLOCK(pmap)) { + md_gen = m->md.pv_gen; + pvh_gen = pvh->pv_gen; + rw_wunlock(lock); + PMAP_LOCK(pmap); + rw_wlock(lock); + if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) { + PMAP_UNLOCK(pmap); + goto restart; + } + } + l2 = pmap_l2(pmap, pv->pv_va); + KASSERT((pmap_load(l2) & PTE_RWX) == 0, + ("pmap_clear_modify: found a 2mpage in page %p's pv list", + m)); + l3 = pmap_l2_to_l3(l2, pv->pv_va); + if ((pmap_load(l3) & (PTE_D | PTE_W)) == (PTE_D | PTE_W)) { + pmap_clear_bits(l3, PTE_D); + pmap_invalidate_page(pmap, pv->pv_va); + } + PMAP_UNLOCK(pmap); + } + rw_wunlock(lock); + rw_runlock(&pvh_global_lock); } void * From 35c91b0c278e38eaaa7fb3bfe0d18ead2e65b4b6 Mon Sep 17 00:00:00 2001 From: Mark Johnston Date: Wed, 13 Feb 2019 17:50:01 +0000 Subject: [PATCH 70/93] Implement per-CPU pmap activation tracking for RISC-V. This reduces the overhead of TLB invalidations by ensuring that we only interrupt CPUs which are using the given pmap. Tracking is performed in pmap_activate(), which gets called during context switches: from cpu_throw(), if a thread is exiting or an AP is starting, or cpu_switch() for a regular context switch. For now, pmap_sync_icache() still must interrupt all CPUs. Reviewed by: kib (earlier version), jhb Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D18874 --- sys/riscv/include/pcb.h | 1 - sys/riscv/include/pcpu.h | 1 + sys/riscv/include/pmap.h | 7 +++ sys/riscv/riscv/genassym.c | 1 - sys/riscv/riscv/machdep.c | 4 -- sys/riscv/riscv/mp_machdep.c | 4 ++ sys/riscv/riscv/pmap.c | 88 ++++++++++++++++++++++++++---------- sys/riscv/riscv/swtch.S | 72 +++++++++++------------------ sys/riscv/riscv/vm_machdep.c | 3 -- 9 files changed, 104 insertions(+), 77 deletions(-) diff --git a/sys/riscv/include/pcb.h b/sys/riscv/include/pcb.h index 27737a4bdf2..6cc85198a41 100644 --- a/sys/riscv/include/pcb.h +++ b/sys/riscv/include/pcb.h @@ -55,7 +55,6 @@ struct pcb { #define PCB_FP_STARTED 0x1 #define PCB_FP_USERMASK 0x1 uint64_t pcb_sepc; /* Supervisor exception pc */ - vm_offset_t pcb_l1addr; /* L1 page tables base address */ vm_offset_t pcb_onfault; /* Copyinout fault handler */ }; diff --git a/sys/riscv/include/pcpu.h b/sys/riscv/include/pcpu.h index b91c48eef1c..d570a68138c 100644 --- a/sys/riscv/include/pcpu.h +++ b/sys/riscv/include/pcpu.h @@ -45,6 +45,7 @@ #define ALT_STACK_SIZE 128 #define PCPU_MD_FIELDS \ + struct pmap *pc_curpmap; /* Currently active pmap */ \ uint32_t pc_pending_ipis; /* IPIs pending to this CPU */ \ char __pad[61] diff --git a/sys/riscv/include/pmap.h b/sys/riscv/include/pmap.h index 92eeee26331..2d55b764e9e 100644 --- a/sys/riscv/include/pmap.h +++ b/sys/riscv/include/pmap.h @@ -41,6 +41,7 @@ #ifndef LOCORE #include +#include #include #include @@ -80,6 +81,8 @@ struct pmap { struct mtx pm_mtx; struct pmap_statistics pm_stats; /* pmap statictics */ pd_entry_t *pm_l1; + u_long pm_satp; /* value for SATP register */ + cpuset_t pm_active; /* active on cpus */ TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */ LIST_ENTRY(pmap) pm_list; /* List of all pmaps */ struct vm_radix pm_root; @@ -137,6 +140,10 @@ extern vm_offset_t virtual_end; #define L1_MAPPABLE_P(va, pa, size) \ ((((va) | (pa)) & L1_OFFSET) == 0 && (size) >= L1_SIZE) +struct thread; + +void pmap_activate_boot(pmap_t); +void pmap_activate_sw(struct thread *); void pmap_bootstrap(vm_offset_t, vm_paddr_t, vm_size_t); void pmap_kenter_device(vm_offset_t, vm_size_t, vm_paddr_t); vm_paddr_t pmap_kextract(vm_offset_t va); diff --git a/sys/riscv/riscv/genassym.c b/sys/riscv/riscv/genassym.c index 8336769743b..f69a3b86470 100644 --- a/sys/riscv/riscv/genassym.c +++ b/sys/riscv/riscv/genassym.c @@ -63,7 +63,6 @@ ASSYM(TDF_ASTPENDING, TDF_ASTPENDING); ASSYM(TDF_NEEDRESCHED, TDF_NEEDRESCHED); ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault)); -ASSYM(PCB_L1ADDR, offsetof(struct pcb, pcb_l1addr)); ASSYM(PCB_SIZE, sizeof(struct pcb)); ASSYM(PCB_RA, offsetof(struct pcb, pcb_ra)); ASSYM(PCB_SP, offsetof(struct pcb, pcb_sp)); diff --git a/sys/riscv/riscv/machdep.c b/sys/riscv/riscv/machdep.c index e98c90392f6..d5159c72cb5 100644 --- a/sys/riscv/riscv/machdep.c +++ b/sys/riscv/riscv/machdep.c @@ -871,10 +871,6 @@ initriscv(struct riscv_bootparams *rvbp) init_proc0(rvbp->kern_stack); - /* set page table base register for thread0 */ - thread0.td_pcb->pcb_l1addr = \ - (rvbp->kern_l1pt - KERNBASE + rvbp->kern_phys); - msgbufinit(msgbufp, msgbufsize); mutex_init(); init_param2(physmem); diff --git a/sys/riscv/riscv/mp_machdep.c b/sys/riscv/riscv/mp_machdep.c index 0cd95fd1f79..609d32b30f9 100644 --- a/sys/riscv/riscv/mp_machdep.c +++ b/sys/riscv/riscv/mp_machdep.c @@ -58,6 +58,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -255,6 +256,9 @@ init_secondary(uint64_t cpu) /* Enable external (PLIC) interrupts */ csr_set(sie, SIE_SEIE); + /* Activate process 0's pmap. */ + pmap_activate_boot(vmspace_pmap(proc0.p_vmspace)); + mtx_lock_spin(&ap_boot_mtx); atomic_add_rel_32(&smp_cpus, 1); diff --git a/sys/riscv/riscv/pmap.c b/sys/riscv/riscv/pmap.c index 18400128790..d25224f75e7 100644 --- a/sys/riscv/riscv/pmap.c +++ b/sys/riscv/riscv/pmap.c @@ -118,9 +118,10 @@ __FBSDID("$FreeBSD$"); */ #include +#include #include #include -#include +#include #include #include #include @@ -566,6 +567,8 @@ pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, vm_size_t kernlen) rw_init(&pvh_global_lock, "pmap pv global"); + CPU_FILL(&kernel_pmap->pm_active); + /* Assume the address we were loaded to is a valid physical address. */ min_pa = max_pa = kernstart; @@ -723,9 +726,6 @@ pmap_init(void) * In general, the calling thread uses a plain fence to order the * writes to the page tables before invoking an SBI callback to invoke * sfence_vma() on remote CPUs. - * - * Since the riscv pmap does not yet have a pm_active field, IPIs are - * sent to all CPUs in the system. */ static void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) @@ -733,10 +733,11 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va) cpuset_t mask; sched_pin(); - mask = all_cpus; + mask = pmap->pm_active; CPU_CLR(PCPU_GET(cpuid), &mask); fence(); - sbi_remote_sfence_vma(mask.__bits, va, 1); + if (!CPU_EMPTY(&mask) && smp_started) + sbi_remote_sfence_vma(mask.__bits, va, 1); sfence_vma_page(va); sched_unpin(); } @@ -747,10 +748,11 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) cpuset_t mask; sched_pin(); - mask = all_cpus; + mask = pmap->pm_active; CPU_CLR(PCPU_GET(cpuid), &mask); fence(); - sbi_remote_sfence_vma(mask.__bits, sva, eva - sva + 1); + if (!CPU_EMPTY(&mask) && smp_started) + sbi_remote_sfence_vma(mask.__bits, sva, eva - sva + 1); /* * Might consider a loop of sfence_vma_page() for a small @@ -766,16 +768,17 @@ pmap_invalidate_all(pmap_t pmap) cpuset_t mask; sched_pin(); - mask = all_cpus; + mask = pmap->pm_active; CPU_CLR(PCPU_GET(cpuid), &mask); - fence(); /* * XXX: The SBI doc doesn't detail how to specify x0 as the * address to perform a global fence. BBL currently treats * all sfence_vma requests as global however. */ - sbi_remote_sfence_vma(mask.__bits, 0, 0); + fence(); + if (!CPU_EMPTY(&mask) && smp_started) + sbi_remote_sfence_vma(mask.__bits, 0, 0); sfence_vma(); sched_unpin(); } @@ -1199,6 +1202,9 @@ pmap_pinit0(pmap_t pmap) PMAP_LOCK_INIT(pmap); bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); pmap->pm_l1 = kernel_pmap->pm_l1; + pmap->pm_satp = SATP_MODE_SV39 | (vtophys(pmap->pm_l1) >> PAGE_SHIFT); + CPU_ZERO(&pmap->pm_active); + pmap_activate_boot(pmap); } int @@ -1216,12 +1222,15 @@ pmap_pinit(pmap_t pmap) l1phys = VM_PAGE_TO_PHYS(l1pt); pmap->pm_l1 = (pd_entry_t *)PHYS_TO_DMAP(l1phys); + pmap->pm_satp = SATP_MODE_SV39 | (l1phys >> PAGE_SHIFT); if ((l1pt->flags & PG_ZERO) == 0) pagezero(pmap->pm_l1); bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); + CPU_ZERO(&pmap->pm_active); + /* Install kernel pagetables */ memcpy(pmap->pm_l1, kernel_pmap->pm_l1, PAGE_SIZE); @@ -1411,6 +1420,8 @@ pmap_release(pmap_t pmap) KASSERT(pmap->pm_stats.resident_count == 0, ("pmap_release: pmap resident count %ld != 0", pmap->pm_stats.resident_count)); + KASSERT(CPU_EMPTY(&pmap->pm_active), + ("releasing active pmap %p", pmap)); mtx_lock(&allpmaps_lock); LIST_REMOVE(pmap, pm_list); @@ -4251,26 +4262,56 @@ done: return (val); } +void +pmap_activate_sw(struct thread *td) +{ + pmap_t oldpmap, pmap; + u_int cpu; + + oldpmap = PCPU_GET(curpmap); + pmap = vmspace_pmap(td->td_proc->p_vmspace); + if (pmap == oldpmap) + return; + load_satp(pmap->pm_satp); + + cpu = PCPU_GET(cpuid); +#ifdef SMP + CPU_SET_ATOMIC(cpu, &pmap->pm_active); + CPU_CLR_ATOMIC(cpu, &oldpmap->pm_active); +#else + CPU_SET(cpu, &pmap->pm_active); + CPU_CLR(cpu, &oldpmap->pm_active); +#endif + PCPU_SET(curpmap, pmap); + + sfence_vma(); +} + void pmap_activate(struct thread *td) { - pmap_t pmap; - uint64_t reg; critical_enter(); - pmap = vmspace_pmap(td->td_proc->p_vmspace); - td->td_pcb->pcb_l1addr = vtophys(pmap->pm_l1); - - reg = SATP_MODE_SV39; - reg |= (td->td_pcb->pcb_l1addr >> PAGE_SHIFT); - load_satp(reg); - - pmap_invalidate_all(pmap); + pmap_activate_sw(td); critical_exit(); } void -pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz) +pmap_activate_boot(pmap_t pmap) +{ + u_int cpu; + + cpu = PCPU_GET(cpuid); +#ifdef SMP + CPU_SET_ATOMIC(cpu, &pmap->pm_active); +#else + CPU_SET(cpu, &pmap->pm_active); +#endif + PCPU_SET(curpmap, pmap); +} + +void +pmap_sync_icache(pmap_t pmap, vm_offset_t va, vm_size_t sz) { cpuset_t mask; @@ -4286,7 +4327,8 @@ pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz) mask = all_cpus; CPU_CLR(PCPU_GET(cpuid), &mask); fence(); - sbi_remote_fence_i(mask.__bits); + if (!CPU_EMPTY(&mask) && smp_started) + sbi_remote_fence_i(mask.__bits); sched_unpin(); } diff --git a/sys/riscv/riscv/swtch.S b/sys/riscv/riscv/swtch.S index c80e9877a9c..b57c0487610 100644 --- a/sys/riscv/riscv/swtch.S +++ b/sys/riscv/riscv/swtch.S @@ -207,28 +207,21 @@ ENTRY(fpe_state_clear) END(fpe_state_clear) /* - * void cpu_throw(struct thread *old, struct thread *new) + * void cpu_throw(struct thread *old __unused, struct thread *new) */ ENTRY(cpu_throw) + /* Activate the new thread's pmap. */ + mv s0, a1 + mv a0, a1 + call _C_LABEL(pmap_activate_sw) + mv a0, s0 + /* Store the new curthread */ - sd a1, PC_CURTHREAD(gp) + sd a0, PC_CURTHREAD(gp) /* And the new pcb */ - ld x13, TD_PCB(a1) + ld x13, TD_PCB(a0) sd x13, PC_CURPCB(gp) - sfence.vma - - /* Switch to the new pmap */ - ld t0, PCB_L1ADDR(x13) - srli t0, t0, PAGE_SHIFT - li t1, SATP_MODE_SV39 - or t0, t0, t1 - csrw satp, t0 - - /* TODO: Invalidate the TLB */ - - sfence.vma - /* Load registers */ ld ra, (PCB_RA)(x13) ld sp, (PCB_SP)(x13) @@ -250,7 +243,7 @@ ENTRY(cpu_throw) #ifdef FPE /* Is FPE enabled for new thread? */ - ld t0, TD_FRAME(a1) + ld t0, TD_FRAME(a0) ld t1, (TF_SSTATUS)(t0) li t2, SSTATUS_FS_MASK and t3, t1, t2 @@ -324,39 +317,28 @@ ENTRY(cpu_switch) 1: #endif + /* Activate the new thread's pmap */ + mv s0, a0 + mv s1, a1 + mv s2, a2 + mv a0, a1 + call _C_LABEL(pmap_activate_sw) + mv a1, s1 + + /* Release the old thread */ + sd s2, TD_LOCK(s0) +#if defined(SCHED_ULE) && defined(SMP) + /* Spin if TD_LOCK points to a blocked_lock */ + la s2, _C_LABEL(blocked_lock) +1: + ld t0, TD_LOCK(a1) + beq t0, s2, 1b +#endif /* * Restore the saved context. */ ld x13, TD_PCB(a1) - /* - * TODO: We may need to flush the cache here if switching - * to a user process. - */ - - sfence.vma - - /* Switch to the new pmap */ - ld t0, PCB_L1ADDR(x13) - srli t0, t0, PAGE_SHIFT - li t1, SATP_MODE_SV39 - or t0, t0, t1 - csrw satp, t0 - - /* TODO: Invalidate the TLB */ - - sfence.vma - - /* Release the old thread */ - sd a2, TD_LOCK(a0) -#if defined(SCHED_ULE) && defined(SMP) - /* Spin if TD_LOCK points to a blocked_lock */ - la a2, _C_LABEL(blocked_lock) -1: - ld t0, TD_LOCK(a1) - beq t0, a2, 1b -#endif - /* Restore the registers */ ld tp, (PCB_TP)(x13) ld ra, (PCB_RA)(x13) diff --git a/sys/riscv/riscv/vm_machdep.c b/sys/riscv/riscv/vm_machdep.c index 14bb0ee3430..c1801f01f2f 100644 --- a/sys/riscv/riscv/vm_machdep.c +++ b/sys/riscv/riscv/vm_machdep.c @@ -92,9 +92,6 @@ cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags) td2->td_pcb = pcb2; bcopy(td1->td_pcb, pcb2, sizeof(*pcb2)); - td2->td_pcb->pcb_l1addr = - vtophys(vmspace_pmap(td2->td_proc->p_vmspace)->pm_l1); - tf = (struct trapframe *)STACKALIGN((struct trapframe *)pcb2 - 1); bcopy(td1->td_frame, tf, sizeof(*tf)); From 9031358d6797dc94dc81dab74f11e084aae9a377 Mon Sep 17 00:00:00 2001 From: Leandro Lupori Date: Wed, 13 Feb 2019 18:28:53 +0000 Subject: [PATCH 71/93] silence cast-align warnings from clang on powerpc64 silence the following warning when compiling libthr with clang 8 for powerpc64 architecture: usr/src/lib/libthr/arch/powerpc/include/pthread_md.h:82:10: error: cast from 'uint8_t *' (aka 'unsigned char *') to 'struct tcb *' increases required alignment from 1 to 8 [-Werror,-Wcast-align] 82: return ((struct tcb *)(_tp - TP_OFFSET)); Submitted by: alfredo.junior_eldorado.org.br Reviewed by: git_bdragon.rtk0.net, emaste, kib, jhibbits, luporl Differential Revision: https://reviews.freebsd.org/D18807 --- lib/libthr/arch/powerpc/include/pthread_md.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/lib/libthr/arch/powerpc/include/pthread_md.h b/lib/libthr/arch/powerpc/include/pthread_md.h index a9923d7337d..939d7d2670e 100644 --- a/lib/libthr/arch/powerpc/include/pthread_md.h +++ b/lib/libthr/arch/powerpc/include/pthread_md.h @@ -72,14 +72,15 @@ _tcb_set(struct tcb *tcb) static __inline struct tcb * _tcb_get(void) { - register uint8_t *_tp; + register struct tcb *tcb; + #ifdef __powerpc64__ - __asm __volatile("mr %0,13" : "=r"(_tp)); + __asm __volatile("addi %0,13,%1" : "=r"(tcb) : "i"(-TP_OFFSET)); #else - __asm __volatile("mr %0,2" : "=r"(_tp)); + __asm __volatile("addi %0,2,%1" : "=r"(tcb) : "i"(-TP_OFFSET)); #endif - return ((struct tcb *)(_tp - TP_OFFSET)); + return (tcb); } static __inline struct pthread * From 381ab04f4f5d40a9354267c5169ac1c0bf798ebd Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Wed, 13 Feb 2019 20:13:40 +0000 Subject: [PATCH 72/93] Pull in r353907 from upstream llvm trunk (by Reid Kleckner): [MC] Make symbol version errors non-fatal We stil don't have a source location, which is pretty lame, but at least we won't tell the user to file a clang bug report anymore. Fixes PR40712 This will make errors for symbols with @@ versions that are not defined non-fatal. For example: void f(void) { __asm__(".symver foo,bar@@baz"); } will now result in: error: versioned symbol bar@@baz must be defined instead of clang crashing with a diagnostic report. PR: 234671 Upstream PR: https://bugs.llvm.org/show_bug.cgi?id=40712 MFC after: 3 days --- contrib/llvm/lib/MC/ELFObjectWriter.cpp | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/contrib/llvm/lib/MC/ELFObjectWriter.cpp b/contrib/llvm/lib/MC/ELFObjectWriter.cpp index db531f75c87..b23333e4756 100644 --- a/contrib/llvm/lib/MC/ELFObjectWriter.cpp +++ b/contrib/llvm/lib/MC/ELFObjectWriter.cpp @@ -1258,14 +1258,20 @@ void ELFObjectWriter::executePostLayoutBinding(MCAssembler &Asm, if (!Symbol.isUndefined() && !Rest.startswith("@@@")) continue; - // FIXME: produce a better error message. + // FIXME: Get source locations for these errors or diagnose them earlier. if (Symbol.isUndefined() && Rest.startswith("@@") && - !Rest.startswith("@@@")) - report_fatal_error("A @@ version cannot be undefined"); + !Rest.startswith("@@@")) { + Asm.getContext().reportError(SMLoc(), "versioned symbol " + AliasName + + " must be defined"); + continue; + } - if (Renames.count(&Symbol) && Renames[&Symbol] != Alias) - report_fatal_error(llvm::Twine("Multiple symbol versions defined for ") + - Symbol.getName()); + if (Renames.count(&Symbol) && Renames[&Symbol] != Alias) { + Asm.getContext().reportError( + SMLoc(), llvm::Twine("multiple symbol versions defined for ") + + Symbol.getName()); + continue; + } Renames.insert(std::make_pair(&Symbol, Alias)); } From 5864456d1ae16e8352d3e455a52301733ab4e3da Mon Sep 17 00:00:00 2001 From: Andriy Voskoboinyk Date: Thu, 14 Feb 2019 09:21:19 +0000 Subject: [PATCH 73/93] Add UPDATING entry for IEEE80211_AMPDU_AGE and AH_SUPPORT_AR5416 options removal Notified by: ian --- UPDATING | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/UPDATING b/UPDATING index c4338e1efda..c0f96e97b58 100644 --- a/UPDATING +++ b/UPDATING @@ -38,6 +38,12 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 13.x IS SLOW: modules on kernels not having 'device iflib', the iflib.ko module is loaded automatically. +20190125: + The IEEE80211_AMPDU_AGE and AH_SUPPORT_AR5416 kernel configuration + options no longer exist since r343219 and r343427 respectively; + nothing uses them, so they should be just removed from custom + kernel config files. + 20181230: r342635 changes the way efibootmgr(8) works by requiring users to add the -b (bootnum) parameter for commands where the bootnum was previously From 642bb66b63cf5d199e99df898a26238d7aed86da Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Thu, 14 Feb 2019 13:53:11 +0000 Subject: [PATCH 74/93] Provide userspace versions of do_cpuid() and cpuid_count() on i386. Some older compilers, when generating PIC code, cannot handle inline asm that clobbers %ebx (because %ebx is used as the GOT offset register). Userspace versions avoid clobbering %ebx by saving it to stack before executing the CPUID instruction. Sponsored by: The FreeBSD Foundation MFC after: 1 week --- sys/i386/include/cpufunc.h | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/sys/i386/include/cpufunc.h b/sys/i386/include/cpufunc.h index da9a3e3a086..0e935e8c7d7 100644 --- a/sys/i386/include/cpufunc.h +++ b/sys/i386/include/cpufunc.h @@ -108,21 +108,47 @@ disable_intr(void) __asm __volatile("cli" : : : "memory"); } +#ifdef _KERNEL static __inline void do_cpuid(u_int ax, u_int *p) { __asm __volatile("cpuid" - : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3]) - : "0" (ax)); + : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3]) + : "0" (ax)); } static __inline void cpuid_count(u_int ax, u_int cx, u_int *p) { __asm __volatile("cpuid" - : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3]) - : "0" (ax), "c" (cx)); + : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3]) + : "0" (ax), "c" (cx)); } +#else +static __inline void +do_cpuid(u_int ax, u_int *p) +{ + __asm __volatile( + "pushl\t%%ebx\n\t" + "cpuid\n\t" + "movl\t%%ebx,%1\n\t" + "popl\t%%ebx" + : "=a" (p[0]), "=DS" (p[1]), "=c" (p[2]), "=d" (p[3]) + : "0" (ax)); +} + +static __inline void +cpuid_count(u_int ax, u_int cx, u_int *p) +{ + __asm __volatile( + "pushl\t%%ebx\n\t" + "cpuid\n\t" + "movl\t%%ebx,%1\n\t" + "popl\t%%ebx" + : "=a" (p[0]), "=DS" (p[1]), "=c" (p[2]), "=d" (p[3]) + : "0" (ax), "c" (cx)); +} +#endif static __inline void enable_intr(void) From b5d72efb7a3edaf7a1bfd0f36c75151aa412e4c2 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Thu, 14 Feb 2019 13:59:00 +0000 Subject: [PATCH 75/93] x86 __vdso_gettc(): use machine/cpufunc.h function for CPUID. Based on the discussion with: jkim Sponsored by: The FreeBSD Foundation MFC after: 1 week --- lib/libc/x86/sys/__vdso_gettc.c | 29 ++--------------------------- 1 file changed, 2 insertions(+), 27 deletions(-) diff --git a/lib/libc/x86/sys/__vdso_gettc.c b/lib/libc/x86/sys/__vdso_gettc.c index 3a56b4af7bd..0781e97e2ce 100644 --- a/lib/libc/x86/sys/__vdso_gettc.c +++ b/lib/libc/x86/sys/__vdso_gettc.c @@ -53,31 +53,6 @@ __FBSDID("$FreeBSD$"); #include #include "libc_private.h" -static void -cpuidp(u_int leaf, u_int p[4]) -{ - - __asm __volatile( -#if defined(__i386__) - " pushl %%ebx\n" -#endif - " cpuid\n" -#if defined(__i386__) - " movl %%ebx,%1\n" - " popl %%ebx" -#endif - : "=a" (p[0]), -#if defined(__i386__) - "=r" (p[1]), -#elif defined(__amd64__) - "=b" (p[1]), -#else -#error "Arch" -#endif - "=c" (p[2]), "=d" (p[3]) - : "0" (leaf)); -} - static void rdtsc_mb_lfence(void) { @@ -100,12 +75,12 @@ rdtsc_mb_none(void) DEFINE_UIFUNC(static, void, rdtsc_mb, (void), static) { u_int p[4]; - /* Not a typo, string matches our cpuidp() registers use. */ + /* Not a typo, string matches our do_cpuid() registers use. */ static const char intel_id[] = "GenuntelineI"; if ((cpu_feature & CPUID_SSE2) == 0) return (rdtsc_mb_none); - cpuidp(0, p); + do_cpuid(0, p); return (memcmp(p + 1, intel_id, sizeof(intel_id) - 1) == 0 ? rdtsc_mb_lfence : rdtsc_mb_mfence); } From 071bca67ee8eeca078945099ba4637cefa2ed949 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Thu, 14 Feb 2019 14:02:33 +0000 Subject: [PATCH 76/93] Unify i386 and amd64 getcontextx.c, and use ifuncs while there. In particular, use ifuncs for __getcontextx_size(), also calculate the size of the extended save area in resolver. Same for __fillcontextx2(). Sponsored by: The FreeBSD Foundation MFC after: 1 week --- lib/libc/Makefile | 1 + lib/libc/amd64/gen/Makefile.inc | 2 +- lib/libc/i386/gen/Makefile.inc | 2 +- lib/libc/i386/gen/getcontextx.c | 145 ---------------------- lib/libc/x86/gen/Makefile.inc | 6 + lib/libc/{amd64 => x86}/gen/getcontextx.c | 85 ++++++++----- 6 files changed, 65 insertions(+), 176 deletions(-) delete mode 100644 lib/libc/i386/gen/getcontextx.c create mode 100644 lib/libc/x86/gen/Makefile.inc rename lib/libc/{amd64 => x86}/gen/getcontextx.c (62%) diff --git a/lib/libc/Makefile b/lib/libc/Makefile index 4a456f0e495..b545a2e81e9 100644 --- a/lib/libc/Makefile +++ b/lib/libc/Makefile @@ -122,6 +122,7 @@ NOASM= .endif .if ${LIBC_ARCH} == "i386" || ${LIBC_ARCH} == "amd64" .include "${LIBC_SRCTOP}/x86/sys/Makefile.inc" +.include "${LIBC_SRCTOP}/x86/gen/Makefile.inc" .endif .if ${MK_NIS} != "no" CFLAGS+= -DYP diff --git a/lib/libc/amd64/gen/Makefile.inc b/lib/libc/amd64/gen/Makefile.inc index fb4f7f4d30f..30fb05f89cb 100644 --- a/lib/libc/amd64/gen/Makefile.inc +++ b/lib/libc/amd64/gen/Makefile.inc @@ -2,7 +2,7 @@ # $FreeBSD$ SRCS+= _setjmp.S _set_tp.c rfork_thread.S setjmp.S sigsetjmp.S \ - fabs.S getcontextx.c \ + fabs.S \ infinity.c ldexp.c makecontext.c signalcontext.c \ flt_rounds.c fpgetmask.c fpsetmask.c fpgetprec.c fpsetprec.c \ fpgetround.c fpsetround.c fpgetsticky.c diff --git a/lib/libc/i386/gen/Makefile.inc b/lib/libc/i386/gen/Makefile.inc index 73dcabbece5..45e69cad1d0 100644 --- a/lib/libc/i386/gen/Makefile.inc +++ b/lib/libc/i386/gen/Makefile.inc @@ -2,5 +2,5 @@ # $FreeBSD$ SRCS+= _ctx_start.S _setjmp.S _set_tp.c fabs.S \ - flt_rounds.c getcontextx.c infinity.c ldexp.c makecontext.c \ + flt_rounds.c infinity.c ldexp.c makecontext.c \ rfork_thread.S setjmp.S signalcontext.c sigsetjmp.S diff --git a/lib/libc/i386/gen/getcontextx.c b/lib/libc/i386/gen/getcontextx.c deleted file mode 100644 index aedf3f021a7..00000000000 --- a/lib/libc/i386/gen/getcontextx.c +++ /dev/null @@ -1,145 +0,0 @@ -/*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD - * - * Copyright (c) 2011 Konstantin Belousov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -__FBSDID("$FreeBSD$"); - -#include -#include -#include -#include -#include -#include -#include -#include - -static int xstate_sz = -1; - -int -__getcontextx_size(void) -{ - u_int p[4]; - int cpuid_supported; - - if (xstate_sz == -1) { - __asm __volatile( - " pushfl\n" - " popl %%eax\n" - " movl %%eax,%%ecx\n" - " xorl $0x200000,%%eax\n" - " pushl %%eax\n" - " popfl\n" - " pushfl\n" - " popl %%eax\n" - " xorl %%eax,%%ecx\n" - " je 1f\n" - " movl $1,%0\n" - " jmp 2f\n" - "1: movl $0,%0\n" - "2:\n" - : "=r" (cpuid_supported) : : "eax", "ecx"); - if (cpuid_supported) { - __asm __volatile( - " pushl %%ebx\n" - " cpuid\n" - " movl %%ebx,%1\n" - " popl %%ebx\n" - : "=a" (p[0]), "=r" (p[1]), "=c" (p[2]), "=d" (p[3]) - : "0" (0x1)); - if ((p[2] & CPUID2_OSXSAVE) != 0) { - __asm __volatile( - " pushl %%ebx\n" - " cpuid\n" - " movl %%ebx,%1\n" - " popl %%ebx\n" - : "=a" (p[0]), "=r" (p[1]), "=c" (p[2]), - "=d" (p[3]) - : "0" (0xd), "2" (0x0)); - xstate_sz = p[1] - sizeof(struct savexmm); - } else - xstate_sz = 0; - } else - xstate_sz = 0; - } - - return (sizeof(ucontext_t) + xstate_sz); -} - -int -__fillcontextx2(char *ctx) -{ - struct i386_get_xfpustate xfpu; - ucontext_t *ucp; - - ucp = (ucontext_t *)ctx; - if (xstate_sz != 0) { - xfpu.addr = (char *)(ucp + 1); - xfpu.len = xstate_sz; - if (sysarch(I386_GET_XFPUSTATE, &xfpu) == -1) - return (-1); - ucp->uc_mcontext.mc_xfpustate = (__register_t)xfpu.addr; - ucp->uc_mcontext.mc_xfpustate_len = xstate_sz; - ucp->uc_mcontext.mc_flags |= _MC_HASFPXSTATE; - } else { - ucp->uc_mcontext.mc_xfpustate = 0; - ucp->uc_mcontext.mc_xfpustate_len = 0; - } - return (0); -} - -int -__fillcontextx(char *ctx) -{ - ucontext_t *ucp; - - ucp = (ucontext_t *)ctx; - if (getcontext(ucp) == -1) - return (-1); - __fillcontextx2(ctx); - return (0); -} - -__weak_reference(__getcontextx, getcontextx); - -ucontext_t * -__getcontextx(void) -{ - char *ctx; - int error; - - ctx = malloc(__getcontextx_size()); - if (ctx == NULL) - return (NULL); - if (__fillcontextx(ctx) == -1) { - error = errno; - free(ctx); - errno = error; - return (NULL); - } - return ((ucontext_t *)ctx); -} diff --git a/lib/libc/x86/gen/Makefile.inc b/lib/libc/x86/gen/Makefile.inc new file mode 100644 index 00000000000..0943fdb04bb --- /dev/null +++ b/lib/libc/x86/gen/Makefile.inc @@ -0,0 +1,6 @@ +# $FreeBSD$ + +.PATH: ${LIBC_SRCTOP}/x86/gen + +SRCS+= \ + getcontextx.c diff --git a/lib/libc/amd64/gen/getcontextx.c b/lib/libc/x86/gen/getcontextx.c similarity index 62% rename from lib/libc/amd64/gen/getcontextx.c rename to lib/libc/x86/gen/getcontextx.c index a109bfe738a..e9060bac44a 100644 --- a/lib/libc/amd64/gen/getcontextx.c +++ b/lib/libc/x86/gen/getcontextx.c @@ -35,51 +35,78 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include #include +#include +#include -static int xstate_sz = -1; +#if defined __i386__ +#define X86_GET_XFPUSTATE I386_GET_XFPUSTATE +typedef struct savexmm savex86_t ; +typedef struct i386_get_xfpustate x86_get_xfpustate_t; +#elif defined __amd64__ +#define X86_GET_XFPUSTATE AMD64_GET_XFPUSTATE +typedef struct savefpu savex86_t; +typedef struct amd64_get_xfpustate x86_get_xfpustate_t; +#else +#error "Wrong arch" +#endif -int -__getcontextx_size(void) +static int xstate_sz = 0; + +static int +__getcontextx_size_xfpu(void) { - u_int p[4]; - - if (xstate_sz == -1) { - do_cpuid(1, p); - if ((p[2] & CPUID2_OSXSAVE) != 0) { - cpuid_count(0xd, 0x0, p); - xstate_sz = p[1] - sizeof(struct savefpu); - } else - xstate_sz = 0; - } return (sizeof(ucontext_t) + xstate_sz); } -int -__fillcontextx2(char *ctx) +DEFINE_UIFUNC(, int, __getcontextx_size, (void), static) { - struct amd64_get_xfpustate xfpu; + u_int p[4]; + + if ((cpu_feature2 & CPUID2_OSXSAVE) != 0) { + cpuid_count(0xd, 0x0, p); + xstate_sz = p[1] - sizeof(savex86_t); + } + return (__getcontextx_size_xfpu); +} + +static int +__fillcontextx2_xfpu(char *ctx) +{ + x86_get_xfpustate_t xfpu; ucontext_t *ucp; ucp = (ucontext_t *)ctx; - if (xstate_sz != 0) { - xfpu.addr = (char *)(ucp + 1); - xfpu.len = xstate_sz; - if (sysarch(AMD64_GET_XFPUSTATE, &xfpu) == -1) - return (-1); - ucp->uc_mcontext.mc_xfpustate = (__register_t)xfpu.addr; - ucp->uc_mcontext.mc_xfpustate_len = xstate_sz; - ucp->uc_mcontext.mc_flags |= _MC_HASFPXSTATE; - } else { - ucp->uc_mcontext.mc_xfpustate = 0; - ucp->uc_mcontext.mc_xfpustate_len = 0; - } + xfpu.addr = (char *)(ucp + 1); + xfpu.len = xstate_sz; + if (sysarch(X86_GET_XFPUSTATE, &xfpu) == -1) + return (-1); + ucp->uc_mcontext.mc_xfpustate = (__register_t)xfpu.addr; + ucp->uc_mcontext.mc_xfpustate_len = xstate_sz; + ucp->uc_mcontext.mc_flags |= _MC_HASFPXSTATE; return (0); } +static int +__fillcontextx2_noxfpu(char *ctx) +{ + ucontext_t *ucp; + + ucp = (ucontext_t *)ctx; + ucp->uc_mcontext.mc_xfpustate = 0; + ucp->uc_mcontext.mc_xfpustate_len = 0; + return (0); +} + +DEFINE_UIFUNC(, int, __fillcontextx2, (char *), static) +{ + + return ((cpu_feature2 & CPUID2_OSXSAVE) != 0 ? __fillcontextx2_xfpu : + __fillcontextx2_noxfpu); +} + int __fillcontextx(char *ctx) { From 72091bb39382abba0d71dc23738684bfb4bc2574 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Thu, 14 Feb 2019 14:44:53 +0000 Subject: [PATCH 77/93] Enable enabling ASLR on non-x86 architectures. Discussed with: emaste Sponsored by: The FreeBSD Foundation --- sys/arm/arm/elf_machdep.c | 2 +- sys/arm64/arm64/elf_machdep.c | 3 ++- sys/mips/mips/elf_machdep.c | 4 ++-- sys/powerpc/powerpc/elf32_machdep.c | 2 +- sys/powerpc/powerpc/elf64_machdep.c | 2 +- sys/riscv/riscv/elf_machdep.c | 2 +- sys/sparc64/sparc64/elf_machdep.c | 2 +- 7 files changed, 9 insertions(+), 8 deletions(-) diff --git a/sys/arm/arm/elf_machdep.c b/sys/arm/arm/elf_machdep.c index 50e53bd9302..08fd72f8354 100644 --- a/sys/arm/arm/elf_machdep.c +++ b/sys/arm/arm/elf_machdep.c @@ -84,7 +84,7 @@ struct sysentvec elf32_freebsd_sysvec = { #if __ARM_ARCH >= 6 SV_ASLR | SV_SHP | SV_TIMEKEEP | #endif - SV_ABI_FREEBSD | SV_ILP32, + SV_ABI_FREEBSD | SV_ILP32 | SV_ASLR, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, diff --git a/sys/arm64/arm64/elf_machdep.c b/sys/arm64/arm64/elf_machdep.c index ab46995dd11..c3f24fe893e 100644 --- a/sys/arm64/arm64/elf_machdep.c +++ b/sys/arm64/arm64/elf_machdep.c @@ -79,7 +79,8 @@ static struct sysentvec elf64_freebsd_sysvec = { .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, - .sv_flags = SV_SHP | SV_TIMEKEEP | SV_ABI_FREEBSD | SV_LP64, + .sv_flags = SV_SHP | SV_TIMEKEEP | SV_ABI_FREEBSD | SV_LP64 | + SV_ASLR, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, diff --git a/sys/mips/mips/elf_machdep.c b/sys/mips/mips/elf_machdep.c index e8791f821a6..e603c8120e6 100644 --- a/sys/mips/mips/elf_machdep.c +++ b/sys/mips/mips/elf_machdep.c @@ -76,7 +76,7 @@ struct sysentvec elf64_freebsd_sysvec = { .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, - .sv_flags = SV_ABI_FREEBSD | SV_LP64, + .sv_flags = SV_ABI_FREEBSD | SV_LP64 | SV_ASLR, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, @@ -131,7 +131,7 @@ struct sysentvec elf32_freebsd_sysvec = { .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, - .sv_flags = SV_ABI_FREEBSD | SV_ILP32, + .sv_flags = SV_ABI_FREEBSD | SV_ILP32 | SV_ASLR, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, diff --git a/sys/powerpc/powerpc/elf32_machdep.c b/sys/powerpc/powerpc/elf32_machdep.c index 18ae37318fd..ec5b565dcf1 100644 --- a/sys/powerpc/powerpc/elf32_machdep.c +++ b/sys/powerpc/powerpc/elf32_machdep.c @@ -115,7 +115,7 @@ struct sysentvec elf32_freebsd_sysvec = { .sv_fixlimit = NULL, #endif .sv_maxssiz = NULL, - .sv_flags = SV_ABI_FREEBSD | SV_ILP32 | SV_SHP, + .sv_flags = SV_ABI_FREEBSD | SV_ILP32 | SV_SHP | SV_ASLR, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_shared_page_base = FREEBSD32_SHAREDPAGE, diff --git a/sys/powerpc/powerpc/elf64_machdep.c b/sys/powerpc/powerpc/elf64_machdep.c index 7198241a2b3..9c4b3db39c0 100644 --- a/sys/powerpc/powerpc/elf64_machdep.c +++ b/sys/powerpc/powerpc/elf64_machdep.c @@ -79,7 +79,7 @@ struct sysentvec elf64_freebsd_sysvec_v1 = { .sv_setregs = exec_setregs_funcdesc, .sv_fixlimit = NULL, .sv_maxssiz = NULL, - .sv_flags = SV_ABI_FREEBSD | SV_LP64 | SV_SHP, + .sv_flags = SV_ABI_FREEBSD | SV_LP64 | SV_SHP | SV_ASLR, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, diff --git a/sys/riscv/riscv/elf_machdep.c b/sys/riscv/riscv/elf_machdep.c index d4904b1c2cd..da56ad39597 100644 --- a/sys/riscv/riscv/elf_machdep.c +++ b/sys/riscv/riscv/elf_machdep.c @@ -82,7 +82,7 @@ struct sysentvec elf64_freebsd_sysvec = { .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, - .sv_flags = SV_ABI_FREEBSD | SV_LP64 | SV_SHP, + .sv_flags = SV_ABI_FREEBSD | SV_LP64 | SV_SHP | SV_ASLR, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, diff --git a/sys/sparc64/sparc64/elf_machdep.c b/sys/sparc64/sparc64/elf_machdep.c index 1fbd09fa945..8f7e55bc9b3 100644 --- a/sys/sparc64/sparc64/elf_machdep.c +++ b/sys/sparc64/sparc64/elf_machdep.c @@ -80,7 +80,7 @@ static struct sysentvec elf64_freebsd_sysvec = { .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, - .sv_flags = SV_ABI_FREEBSD | SV_LP64, + .sv_flags = SV_ABI_FREEBSD | SV_LP64 | SV_ASLR, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, From 59621b207c8dddba1abe9dd3455178e5186f5a8c Mon Sep 17 00:00:00 2001 From: Leandro Lupori Date: Thu, 14 Feb 2019 15:15:32 +0000 Subject: [PATCH 78/93] [PPC64] Fix mismatch between thread flags and MSR When sigreturn() restored a thread's context, SRR1 was being restored to its previous value, but pcb_flags was not being touched. This could cause a mismatch between the thread's MSR and its pcb_flags. For instance, when the thread used the FPU for the first time inside the signal handler, sigreturn() would clear SRR1, but not pcb_flags. Then, the thread would return with the FPU bit cleared in MSR and, the next time it tried to use the FPU, it would fail on a KASSERT that checked if the FPU was disabled. This change clears the FPU bit in both pcb_flags and frame->srr1, as the code that restores the context expects to use the FPU trap to re-enable it. PR: 234539 Reported by: sbruno Reviewed by: jhibbits, sbruno Differential Revision: https://reviews.freebsd.org/D19166 --- sys/powerpc/powerpc/exec_machdep.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sys/powerpc/powerpc/exec_machdep.c b/sys/powerpc/powerpc/exec_machdep.c index 4cb9fb8bbae..3ae1640b264 100644 --- a/sys/powerpc/powerpc/exec_machdep.c +++ b/sys/powerpc/powerpc/exec_machdep.c @@ -474,6 +474,10 @@ set_mcontext(struct thread *td, mcontext_t *mcp) else tf->fixreg[2] = tls; + /* Disable FPU */ + tf->srr1 &= ~PSL_FP; + pcb->pcb_flags &= ~PCB_FPU; + if (mcp->mc_flags & _MC_FP_VALID) { /* enable_fpu() will happen lazily on a fault */ pcb->pcb_flags |= PCB_FPREGS; From 484e9d0322ecc53bf22bf2d1a44913a6ae37174d Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Thu, 14 Feb 2019 15:45:53 +0000 Subject: [PATCH 79/93] Make anon clustering more compatible. Make the clustering enabling knob more fine-grained by providing a setting where the allocation with hint is not clustered. This is aimed to be somewhat more compatible with e.g. go 1.4 which expects that hinted mmap without MAP_FIXED does not change the allocation address. Now the vm.cluster_anon can be set to 1 to only cluster when no hints, and to 2 to always cluster. Default value is 1. Requested by: peter Reviewed by: emaste, markj Sponsored by: The FreeBSD Foundation MFC after: 1 month Differential revision: https://reviews.freebsd.org/D19194 --- sys/vm/vm_map.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index 01544c8bf00..7fa738f4015 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -1487,7 +1487,22 @@ static const int aslr_pages_rnd_32[2] = {0x100, 0x4}; static int cluster_anon = 1; SYSCTL_INT(_vm, OID_AUTO, cluster_anon, CTLFLAG_RW, &cluster_anon, 0, - "Cluster anonymous mappings"); + "Cluster anonymous mappings: 0 = no, 1 = yes if no hint, 2 = always"); + +static bool +clustering_anon_allowed(vm_offset_t addr) +{ + + switch (cluster_anon) { + case 0: + return (false); + case 1: + return (addr == 0); + case 2: + default: + return (true); + } +} static long aslr_restarts; SYSCTL_LONG(_vm, OID_AUTO, aslr_restarts, CTLFLAG_RD, @@ -1593,7 +1608,7 @@ vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset, } else alignment = 0; en_aslr = (map->flags & MAP_ASLR) != 0; - update_anon = cluster = cluster_anon != 0 && + update_anon = cluster = clustering_anon_allowed(*addr) && (map->flags & MAP_IS_SUB_MAP) == 0 && max_addr == 0 && find_space != VMFS_NO_SPACE && object == NULL && (cow & (MAP_INHERIT_SHARE | MAP_STACK_GROWS_UP | From deb17a3ba3a5d71d13357b9ee072932102faaa3d Mon Sep 17 00:00:00 2001 From: Warner Losh Date: Thu, 14 Feb 2019 17:04:04 +0000 Subject: [PATCH 80/93] Fix small typo. Differential Review: https://reviews.freebsd.org/D19193 --- UPDATING | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/UPDATING b/UPDATING index c0f96e97b58..58da2b7af0a 100644 --- a/UPDATING +++ b/UPDATING @@ -237,7 +237,7 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 13.x IS SLOW: 20180719: ARM64 now have efifb support, if you want to have serial console on your arm64 board when an screen is connected and the bootloader - setup a frambuffer for us to use, just add : + setup a framebuffer for us to use, just add : boot_serial=YES boot_multicons=YES in /boot/loader.conf From af06fa2652fb2b62ddb1647d7c036a32276fc2e5 Mon Sep 17 00:00:00 2001 From: Eric Joyner Date: Thu, 14 Feb 2019 18:02:37 +0000 Subject: [PATCH 81/93] ixl: Fix panic caused by bug exposed by r344062 Don't use a struct if_irq for IFLIB_INTR_IOV type interrupts since that results in get_core_offset() being called on them, and get_core_offset() doesn't handle IFLIB_INTR_IOV type interrupts, which results in an assert() being triggered in iflib_irq_set_affinity(). PR: 235730 Reported by: Jeffrey Pieper MFC after: 1 day Sponsored by: Intel Corporation --- sys/dev/ixl/if_ixl.c | 2 +- sys/dev/ixl/ixl_pf.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/sys/dev/ixl/if_ixl.c b/sys/dev/ixl/if_ixl.c index fc9ad8e11d7..ed1513f3a51 100644 --- a/sys/dev/ixl/if_ixl.c +++ b/sys/dev/ixl/if_ixl.c @@ -932,7 +932,7 @@ ixl_if_msix_intr_assign(if_ctx_t ctx, int msix) return (err); } /* Create soft IRQ for handling VFLRs */ - iflib_softirq_alloc_generic(ctx, &pf->iov_irq, IFLIB_INTR_IOV, pf, 0, "iov"); + iflib_softirq_alloc_generic(ctx, NULL, IFLIB_INTR_IOV, pf, 0, "iov"); /* Now set up the stations */ for (i = 0, vector = 1; i < vsi->shared->isc_nrxqsets; i++, vector++, rx_que++) { diff --git a/sys/dev/ixl/ixl_pf.h b/sys/dev/ixl/ixl_pf.h index 99992e1ec60..b72277233e4 100644 --- a/sys/dev/ixl/ixl_pf.h +++ b/sys/dev/ixl/ixl_pf.h @@ -138,7 +138,6 @@ struct ixl_pf { struct ixl_vf *vfs; int num_vfs; uint16_t veb_seid; - struct if_irq iov_irq; }; /* From 23e5e43ccd0fc9211757a56cd9d1b7386cfd1415 Mon Sep 17 00:00:00 2001 From: Bruce Evans Date: Thu, 14 Feb 2019 19:07:08 +0000 Subject: [PATCH 82/93] Finish the fix for overflow in calcru1(). The previous fix was unnecessarily very slow up to 105 hours where the simple formula used previously worked, and unnecessarily slow by a factor of about 5/3 up to 388 days, and didn't work above 388 days. 388 days is not a long time, since it is a reasonable uptime, and for processes the times being calculated are aggregated over all threads, so with N CPUs running the same thread a runtime of 388 days is reachable after only 388 / N physical days. The PRs document overflow at 388 days, but don't try to fix it. Use the simple formula up to 76 hours. Then use a complicated general method that reduces to the simple formula up to a bit less than 105 hours, then reduces to the previous method without its extra work up to almost 388 days, then does more complicated reductions, usually many bits at a time so that this is not slow. This works up to half of maximum representable time (292271 years), with accumulated rounding errors of at most 32 usec. amd64 can do all this with no avoidable rounding errors in an inline asm with 2 instructions, but this is too special to use. __uint128_t can do the same with 100's of instructions on 64-bit arches. Long doubles with at least 64 bits of precision are the easiest method to use on i386 userland, but are hard to use in the kernel. PR: 76972 and duplicates Reviewed by: kib --- sys/kern/kern_resource.c | 93 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 87 insertions(+), 6 deletions(-) diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c index a4de84c311c..4264a125364 100644 --- a/sys/kern/kern_resource.c +++ b/sys/kern/kern_resource.c @@ -863,13 +863,88 @@ rufetchtd(struct thread *td, struct rusage *ru) calcru1(p, &td->td_rux, &ru->ru_utime, &ru->ru_stime); } +/* XXX: the MI version is too slow to use: */ +#ifndef __HAVE_INLINE_FLSLL +#define flsll(x) (fls((x) >> 32) != 0 ? fls((x) >> 32) + 32 : fls(x)) +#endif + static uint64_t mul64_by_fraction(uint64_t a, uint64_t b, uint64_t c) { + uint64_t acc, bh, bl; + int i, s, sa, sb; + /* - * Compute floor(a * (b / c)) without overflowing, (b / c) <= 1.0. + * Calculate (a * b) / c accurately enough without overflowing. c + * must be nonzero, and its top bit must be 0. a or b must be + * <= c, and the implementation is tuned for b <= c. + * + * The comments about times are for use in calcru1() with units of + * microseconds for 'a' and stathz ticks at 128 Hz for b and c. + * + * Let n be the number of top zero bits in c. Each iteration + * either returns, or reduces b by right shifting it by at least n. + * The number of iterations is at most 1 + 64 / n, and the error is + * at most the number of iterations. + * + * It is very unusual to need even 2 iterations. Previous + * implementations overflowed essentially by returning early in the + * first iteration, with n = 38 giving overflow at 105+ hours and + * n = 32 giving overlow at at 388+ days despite a more careful + * calculation. 388 days is a reasonable uptime, and the calculation + * needs to work for the uptime times the number of CPUs since 'a' + * is per-process. */ - return ((a / c) * b + (a % c) * (b / c) + (a % c) * (b % c) / c); + if (a >= (uint64_t)1 << 63) + return (0); /* Unsupported arg -- can't happen. */ + acc = 0; + for (i = 0; i < 128; i++) { + sa = flsll(a); + sb = flsll(b); + if (sa + sb <= 64) + /* Up to 105 hours on first iteration. */ + return (acc + (a * b) / c); + if (a >= c) { + /* + * This reduction is based on a = q * c + r, with the + * remainder r < c. 'a' may be large to start, and + * moving bits from b into 'a' at the end of the loop + * sets the top bit of 'a', so the reduction makes + * significant progress. + */ + acc += (a / c) * b; + a %= c; + sa = flsll(a); + if (sa + sb <= 64) + /* Up to 388 days on first iteration. */ + return (acc + (a * b) / c); + } + + /* + * This step writes a * b as a * ((bh << s) + bl) = + * a * (bh << s) + a * bl = (a << s) * bh + a * bl. The 2 + * additive terms are handled separately. Splitting in + * this way is linear except for rounding errors. + * + * s = 64 - sa is the maximum such that a << s fits in 64 + * bits. Since a < c and c has at least 1 zero top bit, + * sa < 64 and s > 0. Thus this step makes progress by + * reducing b (it increases 'a', but taking remainders on + * the next iteration completes the reduction). + * + * Finally, the choice for s is just what is needed to keep + * a * bl from overflowing, so we don't need complications + * like a recursive call mul64_by_fraction(a, bl, c) to + * handle the second additive term. + */ + s = 64 - sa; + bh = b >> s; + bl = b - (bh << s); + acc += (a * bl) / c; + a <<= s; + b = bh; + } + return (0); /* Algorithm failure -- can't happen. */ } static void @@ -896,15 +971,23 @@ calcru1(struct proc *p, struct rusage_ext *ruxp, struct timeval *up, tu = ruxp->rux_tu; } + /* Subdivide tu. Avoid overflow in the multiplications. */ + if (__predict_true(tu <= ((uint64_t)1 << 38) && tt <= (1 << 26))) { + /* Up to 76 hours when stathz is 128. */ + uu = (tu * ut) / tt; + su = (tu * st) / tt; + } else { + uu = mul64_by_fraction(tu, ut, tt); + su = mul64_by_fraction(tu, ut, st); + } + if (tu >= ruxp->rux_tu) { /* * The normal case, time increased. * Enforce monotonicity of bucketed numbers. */ - uu = mul64_by_fraction(tu, ut, tt); if (uu < ruxp->rux_uu) uu = ruxp->rux_uu; - su = mul64_by_fraction(tu, st, tt); if (su < ruxp->rux_su) su = ruxp->rux_su; } else if (tu + 3 > ruxp->rux_tu || 101 * tu > 100 * ruxp->rux_tu) { @@ -933,8 +1016,6 @@ calcru1(struct proc *p, struct rusage_ext *ruxp, struct timeval *up, "to %ju usec for pid %d (%s)\n", (uintmax_t)ruxp->rux_tu, (uintmax_t)tu, p->p_pid, p->p_comm); - uu = mul64_by_fraction(tu, ut, tt); - su = mul64_by_fraction(tu, st, tt); } ruxp->rux_uu = uu; From a99bc4c3eb8c281a3a0616a037481a5666732745 Mon Sep 17 00:00:00 2001 From: Sean Eric Fagan Date: Fri, 15 Feb 2019 03:46:39 +0000 Subject: [PATCH 83/93] Add CBC-MAC authentication. This adds the CBC-MAC code to the kernel, but does not hook it up to anything (that comes in the next commit). https://tools.ietf.org/html/rfc3610 describes the algorithm. Note that this is a software-only implementation, which means it is fairly slow. Sponsored by: iXsystems Inc Differential Revision: https://reviews.freebsd.org/D18592 --- sys/conf/files | 2 + sys/modules/crypto/Makefile | 2 + sys/opencrypto/cbc_mac.c | 252 +++++++++++++++++++++++++++++++++ sys/opencrypto/cbc_mac.h | 67 +++++++++ sys/opencrypto/cryptodev.h | 7 +- sys/opencrypto/xform_cbc_mac.c | 55 +++++++ 6 files changed, 384 insertions(+), 1 deletion(-) create mode 100644 sys/opencrypto/cbc_mac.c create mode 100644 sys/opencrypto/cbc_mac.h create mode 100644 sys/opencrypto/xform_cbc_mac.c diff --git a/sys/conf/files b/sys/conf/files index 74cabfb486f..aa30c0cc64f 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -4847,6 +4847,8 @@ crypto/libsodium/randombytes.c optional crypto \ compile-with "${NORMAL_C} -I$S/contrib/libsodium/src/libsodium/include -I$S/crypto/libsodium" crypto/libsodium/utils.c optional crypto \ compile-with "${NORMAL_C} -I$S/contrib/libsodium/src/libsodium/include -I$S/crypto/libsodium" +opencrypto/cbc_mac.c optional crypto +opencrypto/xform_cbc_mac.c optional crypto rpc/auth_none.c optional krpc | nfslockd | nfscl | nfsd rpc/auth_unix.c optional krpc | nfslockd | nfscl | nfsd rpc/authunix_prot.c optional krpc | nfslockd | nfscl | nfsd diff --git a/sys/modules/crypto/Makefile b/sys/modules/crypto/Makefile index 0e66e4c3827..09c5d710e98 100644 --- a/sys/modules/crypto/Makefile +++ b/sys/modules/crypto/Makefile @@ -68,5 +68,7 @@ CFLAGS.utils.c += -I${LIBSODIUM_INC} -I${LIBSODIUM_COMPAT} SRCS += opt_param.h cryptodev_if.h bus_if.h device_if.h SRCS += opt_ddb.h +SRCS += cbc_mac.c +SRCS += xform_cbc_mac.c .include diff --git a/sys/opencrypto/cbc_mac.c b/sys/opencrypto/cbc_mac.c new file mode 100644 index 00000000000..e75e5df369b --- /dev/null +++ b/sys/opencrypto/cbc_mac.c @@ -0,0 +1,252 @@ +/* + * Copyright (c) 2018-2019 iXsystems Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$); + +#include +#include +#include +#include +#include +#include + +/* + * Given two CCM_CBC_BLOCK_LEN blocks, xor + * them into dst, and then encrypt dst. + */ +static void +xor_and_encrypt(struct aes_cbc_mac_ctx *ctx, + const uint8_t *src, uint8_t *dst) +{ + const uint64_t *b1; + uint64_t *b2; + uint64_t temp_block[CCM_CBC_BLOCK_LEN/sizeof(uint64_t)]; + + b1 = (const uint64_t*)src; + b2 = (uint64_t*)dst; + + for (size_t count = 0; + count < CCM_CBC_BLOCK_LEN/sizeof(uint64_t); + count++) { + temp_block[count] = b1[count] ^ b2[count]; + } + rijndaelEncrypt(ctx->keysched, ctx->rounds, (void*)temp_block, dst); +} + +void +AES_CBC_MAC_Init(struct aes_cbc_mac_ctx *ctx) +{ + bzero(ctx, sizeof(*ctx)); +} + +void +AES_CBC_MAC_Setkey(struct aes_cbc_mac_ctx *ctx, const uint8_t *key, uint16_t klen) +{ + ctx->rounds = rijndaelKeySetupEnc(ctx->keysched, key, klen * 8); +} + +/* + * This is called to set the nonce, aka IV. + * Before this call, the authDataLength and cryptDataLength fields + * MUST have been set. Sadly, there's no way to return an error. + * + * The CBC-MAC algorithm requires that the first block contain the + * nonce, as well as information about the sizes and lengths involved. + */ +void +AES_CBC_MAC_Reinit(struct aes_cbc_mac_ctx *ctx, const uint8_t *nonce, uint16_t nonceLen) +{ + uint8_t b0[CCM_CBC_BLOCK_LEN]; + uint8_t *bp = b0, flags = 0; + uint8_t L = 0; + uint64_t dataLength = ctx->cryptDataLength; + + KASSERT(ctx->authDataLength != 0 || ctx->cryptDataLength != 0, + ("Auth Data and Data lengths cannot both be 0")); + + KASSERT(nonceLen >= 7 && nonceLen <= 13, + ("nonceLen must be between 7 and 13 bytes")); + + ctx->nonce = nonce; + ctx->nonceLength = nonceLen; + + ctx->authDataCount = 0; + ctx->blockIndex = 0; + explicit_bzero(ctx->staging_block, sizeof(ctx->staging_block)); + + /* + * Need to determine the L field value. This is the number of + * bytes needed to specify the length of the message; the length + * is whatever is left in the 16 bytes after specifying flags and + * the nonce. + */ + L = 15 - nonceLen; + + flags = ((ctx->authDataLength > 0) << 6) + + (((AES_CBC_MAC_HASH_LEN - 2) / 2) << 3) + + L - 1; + /* + * Now we need to set up the first block, which has flags, nonce, + * and the message length. + */ + b0[0] = flags; + bcopy(nonce, b0 + 1, nonceLen); + bp = b0 + 1 + nonceLen; + + /* Need to copy L' [aka L-1] bytes of cryptDataLength */ + for (uint8_t *dst = b0 + sizeof(b0) - 1; dst >= bp; dst--) { + *dst = dataLength; + dataLength >>= 8; + } + /* Now need to encrypt b0 */ + rijndaelEncrypt(ctx->keysched, ctx->rounds, b0, ctx->block); + /* If there is auth data, we need to set up the staging block */ + if (ctx->authDataLength) { + if (ctx->authDataLength < ((1<<16) - (1<<8))) { + uint16_t sizeVal = htobe16(ctx->authDataLength); + bcopy(&sizeVal, ctx->staging_block, sizeof(sizeVal)); + ctx->blockIndex = sizeof(sizeVal); + } else if (ctx->authDataLength < (1UL<<32)) { + uint32_t sizeVal = htobe32(ctx->authDataLength); + ctx->staging_block[0] = 0xff; + ctx->staging_block[1] = 0xfe; + bcopy(&sizeVal, ctx->staging_block+2, sizeof(sizeVal)); + ctx->blockIndex = 2 + sizeof(sizeVal); + } else { + uint64_t sizeVal = htobe64(ctx->authDataLength); + ctx->staging_block[0] = 0xff; + ctx->staging_block[1] = 0xff; + bcopy(&sizeVal, ctx->staging_block+2, sizeof(sizeVal)); + ctx->blockIndex = 2 + sizeof(sizeVal); + } + } +} + +int +AES_CBC_MAC_Update(struct aes_cbc_mac_ctx *ctx, const uint8_t *data, + uint16_t length) +{ + size_t copy_amt; + + /* + * This will be called in one of two phases: + * (1) Applying authentication data, or + * (2) Applying the payload data. + * + * Because CBC-MAC puts the authentication data size before the + * data, subsequent calls won't be block-size-aligned. Which + * complicates things a fair bit. + * + * The payload data doesn't have that problem. + */ + + if (ctx->authDataCount < ctx->authDataLength) { + /* + * We need to process data as authentication data. + * Since we may be out of sync, we may also need + * to pad out the staging block. + */ + const uint8_t *ptr = data; + while (length > 0) { + + copy_amt = MIN(length, + sizeof(ctx->staging_block) - ctx->blockIndex); + + bcopy(ptr, ctx->staging_block + ctx->blockIndex, + copy_amt); + ptr += copy_amt; + length -= copy_amt; + ctx->authDataCount += copy_amt; + ctx->blockIndex += copy_amt; + ctx->blockIndex %= sizeof(ctx->staging_block); + if (ctx->authDataCount == ctx->authDataLength) + length = 0; + if (ctx->blockIndex == 0 || + ctx->authDataCount >= ctx->authDataLength) { + /* + * We're done with this block, so we + * xor staging_block with block, and then + * encrypt it. + */ + xor_and_encrypt(ctx, ctx->staging_block, ctx->block); + bzero(ctx->staging_block, sizeof(ctx->staging_block)); + ctx->blockIndex = 0; + } + } + return (0); + } + /* + * If we're here, then we're encoding payload data. + * This is marginally easier, except that _Update can + * be called with non-aligned update lengths. As a result, + * we still need to use the staging block. + */ + KASSERT((length + ctx->cryptDataCount) <= ctx->cryptDataLength, + ("More encryption data than allowed")); + + while (length) { + uint8_t *ptr; + + copy_amt = MIN(sizeof(ctx->staging_block) - ctx->blockIndex, + length); + ptr = ctx->staging_block + ctx->blockIndex; + bcopy(data, ptr, copy_amt); + data += copy_amt; + ctx->blockIndex += copy_amt; + ctx->cryptDataCount += copy_amt; + length -= copy_amt; + if (ctx->blockIndex == sizeof(ctx->staging_block)) { + /* We've got a full block */ + xor_and_encrypt(ctx, ctx->staging_block, ctx->block); + ctx->blockIndex = 0; + bzero(ctx->staging_block, sizeof(ctx->staging_block)); + } + } + return (0); +} + +void +AES_CBC_MAC_Final(uint8_t *buf, struct aes_cbc_mac_ctx *ctx) +{ + uint8_t s0[CCM_CBC_BLOCK_LEN]; + + /* + * We first need to check to see if we've got any data + * left over to encrypt. + */ + if (ctx->blockIndex != 0) { + xor_and_encrypt(ctx, ctx->staging_block, ctx->block); + ctx->cryptDataCount += ctx->blockIndex; + ctx->blockIndex = 0; + explicit_bzero(ctx->staging_block, sizeof(ctx->staging_block)); + } + bzero(s0, sizeof(s0)); + s0[0] = (15 - ctx->nonceLength) - 1; + bcopy(ctx->nonce, s0 + 1, ctx->nonceLength); + rijndaelEncrypt(ctx->keysched, ctx->rounds, s0, s0); + for (size_t indx = 0; indx < AES_CBC_MAC_HASH_LEN; indx++) + buf[indx] = ctx->block[indx] ^ s0[indx]; + explicit_bzero(s0, sizeof(s0)); +} diff --git a/sys/opencrypto/cbc_mac.h b/sys/opencrypto/cbc_mac.h new file mode 100644 index 00000000000..33e61cc1a12 --- /dev/null +++ b/sys/opencrypto/cbc_mac.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2014 The FreeBSD Foundation + * Copyright (c) 2018, iXsystems Inc. + * All rights reserved. + * + * This software was developed by Sean Eric Fagan, with lots of references + * to existing AES-CCM (gmac) code. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + * + */ + +#ifndef _CBC_CCM_H +# define _CBC_CCM_H + +# include +# include + +# define CCM_CBC_BLOCK_LEN 16 /* 128 bits */ +# define CCM_CBC_MAX_DIGEST_LEN 16 +# define CCM_CBC_MIN_DIGEST_LEN 4 + +/* + * This is the authentication context structure; + * the encryption one is similar. + */ +struct aes_cbc_mac_ctx { + uint64_t authDataLength, authDataCount; + uint64_t cryptDataLength, cryptDataCount; + int blockIndex; + uint8_t staging_block[CCM_CBC_BLOCK_LEN]; + uint8_t block[CCM_CBC_BLOCK_LEN]; + const uint8_t *nonce; + int nonceLength; /* This one is in bytes, not bits! */ + /* AES state data */ + int rounds; + uint32_t keysched[4*(RIJNDAEL_MAXNR+1)]; +}; + +void AES_CBC_MAC_Init(struct aes_cbc_mac_ctx *); +void AES_CBC_MAC_Setkey(struct aes_cbc_mac_ctx *, const uint8_t *, uint16_t); +void AES_CBC_MAC_Reinit(struct aes_cbc_mac_ctx *, const uint8_t *, uint16_t); +int AES_CBC_MAC_Update(struct aes_cbc_mac_ctx *, const uint8_t *, uint16_t); +void AES_CBC_MAC_Final(uint8_t *, struct aes_cbc_mac_ctx *); + +#endif /* _CBC_CCM_H */ diff --git a/sys/opencrypto/cryptodev.h b/sys/opencrypto/cryptodev.h index fe13539eca0..336271a1925 100644 --- a/sys/opencrypto/cryptodev.h +++ b/sys/opencrypto/cryptodev.h @@ -86,6 +86,7 @@ #define SHA1_KPDK_HASH_LEN 20 #define AES_GMAC_HASH_LEN 16 #define POLY1305_HASH_LEN 16 +#define AES_CBC_MAC_HASH_LEN 16 /* Maximum hash algorithm result length */ #define HASH_MAX_LEN SHA2_512_HASH_LEN /* Keep this updated */ @@ -107,6 +108,9 @@ #define AES_128_GMAC_KEY_LEN 16 #define AES_192_GMAC_KEY_LEN 24 #define AES_256_GMAC_KEY_LEN 32 +#define AES_128_CBC_MAC_KEY_LEN 16 +#define AES_192_CBC_MAC_KEY_LEN 24 +#define AES_256_CBC_MAC_KEY_LEN 32 #define POLY1305_KEY_LEN 32 @@ -199,7 +203,8 @@ #define CRYPTO_SHA2_384 36 #define CRYPTO_SHA2_512 37 #define CRYPTO_POLY1305 38 -#define CRYPTO_ALGORITHM_MAX 38 /* Keep updated - see below */ +#define CRYPTO_AES_CCM_CBC_MAC 39 /* auth side */ +#define CRYPTO_ALGORITHM_MAX 39 /* Keep updated - see below */ #define CRYPTO_ALGO_VALID(x) ((x) >= CRYPTO_ALGORITHM_MIN && \ (x) <= CRYPTO_ALGORITHM_MAX) diff --git a/sys/opencrypto/xform_cbc_mac.c b/sys/opencrypto/xform_cbc_mac.c new file mode 100644 index 00000000000..60afe950154 --- /dev/null +++ b/sys/opencrypto/xform_cbc_mac.c @@ -0,0 +1,55 @@ +#include +__FBSDID("$FreeBSD$"); + +#include +#include + +/* Authentication instances */ +struct auth_hash auth_hash_ccm_cbc_mac_128 = { + .type = CRYPTO_AES_CCM_CBC_MAC, + .name = "CBC-CCM-AES-128", + .keysize = AES_128_CBC_MAC_KEY_LEN, + .hashsize = AES_CBC_MAC_HASH_LEN, + .ctxsize = sizeof(struct aes_cbc_mac_ctx), + .blocksize = CCM_CBC_BLOCK_LEN, + .Init = (void (*)(void *)) AES_CBC_MAC_Init, + .Setkey = + (void (*)(void *, const u_int8_t *, u_int16_t))AES_CBC_MAC_Setkey, + .Reinit = + (void (*)(void *, const u_int8_t *, u_int16_t)) AES_CBC_MAC_Reinit, + .Update = + (int (*)(void *, const u_int8_t *, u_int16_t)) AES_CBC_MAC_Update, + .Final = (void (*)(u_int8_t *, void *)) AES_CBC_MAC_Final, +}; +struct auth_hash auth_hash_ccm_cbc_mac_192 = { + .type = CRYPTO_AES_CCM_CBC_MAC, + .name = "CBC-CCM-AES-192", + .keysize = AES_192_CBC_MAC_KEY_LEN, + .hashsize = AES_CBC_MAC_HASH_LEN, + .ctxsize = sizeof(struct aes_cbc_mac_ctx), + .blocksize = CCM_CBC_BLOCK_LEN, + .Init = (void (*)(void *)) AES_CBC_MAC_Init, + .Setkey = + (void (*)(void *, const u_int8_t *, u_int16_t)) AES_CBC_MAC_Setkey, + .Reinit = + (void (*)(void *, const u_int8_t *, u_int16_t)) AES_CBC_MAC_Reinit, + .Update = + (int (*)(void *, const u_int8_t *, u_int16_t)) AES_CBC_MAC_Update, + .Final = (void (*)(u_int8_t *, void *)) AES_CBC_MAC_Final, +}; +struct auth_hash auth_hash_ccm_cbc_mac_256 = { + .type = CRYPTO_AES_CCM_CBC_MAC, + .name = "CBC-CCM-AES-256", + .keysize = AES_256_CBC_MAC_KEY_LEN, + .hashsize = AES_CBC_MAC_HASH_LEN, + .ctxsize = sizeof(struct aes_cbc_mac_ctx), + .blocksize = CCM_CBC_BLOCK_LEN, + .Init = (void (*)(void *)) AES_CBC_MAC_Init, + .Setkey = + (void (*)(void *, const u_int8_t *, u_int16_t)) AES_CBC_MAC_Setkey, + .Reinit = + (void (*)(void *, const u_int8_t *, u_int16_t)) AES_CBC_MAC_Reinit, + .Update = + (int (*)(void *, const u_int8_t *, u_int16_t)) AES_CBC_MAC_Update, + .Final = (void (*)(u_int8_t *, void *)) AES_CBC_MAC_Final, +}; From 507281e55e3903e8ea1007ddf201f6703860189d Mon Sep 17 00:00:00 2001 From: Sean Eric Fagan Date: Fri, 15 Feb 2019 03:53:03 +0000 Subject: [PATCH 84/93] Add AES-CCM encryption, and plumb into OCF. This commit essentially has three parts: * Add the AES-CCM encryption hooks. This is in and of itself fairly small, as there is only a small difference between CCM and the other ICM-based algorithms. * Hook the code into the OpenCrypto framework. This is the bulk of the changes, as the algorithm type has to be checked for, and the differences between it and GCM dealt with. * Update the cryptocheck tool to be aware of it. This is invaluable for confirming that the code works. This is a software-only implementation, meaning that the performance is very low. Sponsored by: iXsystems Inc. Differential Revision: https://reviews.freebsd.org/D19090 --- sys/opencrypto/cryptodev.c | 27 +++- sys/opencrypto/cryptodev.h | 4 +- sys/opencrypto/cryptosoft.c | 86 +++++++++++- sys/opencrypto/xform_aes_icm.c | 28 ++++ sys/opencrypto/xform_auth.h | 5 + sys/opencrypto/xform_enc.h | 1 + tools/tools/crypto/cryptocheck.c | 228 ++++++++++++++++++++++++++++++- 7 files changed, 371 insertions(+), 8 deletions(-) diff --git a/sys/opencrypto/cryptodev.c b/sys/opencrypto/cryptodev.c index cfdf28cd90a..df055fbc26d 100644 --- a/sys/opencrypto/cryptodev.c +++ b/sys/opencrypto/cryptodev.c @@ -444,6 +444,9 @@ cryptof_ioctl( case CRYPTO_CHACHA20: txform = &enc_xform_chacha20; break; + case CRYPTO_AES_CCM_16: + txform = &enc_xform_ccm; + break; default: CRYPTDEB("invalid cipher"); @@ -488,6 +491,25 @@ cryptof_ioctl( thash = &auth_hash_nist_gmac_aes_256; break; + case CRYPTO_AES_CCM_CBC_MAC: + switch (sop->keylen) { + case 16: + thash = &auth_hash_ccm_cbc_mac_128; + break; + case 24: + thash = &auth_hash_ccm_cbc_mac_192; + break; + case 32: + thash = &auth_hash_ccm_cbc_mac_256; + break; + default: + CRYPTDEB("Invalid CBC MAC key size %d", + sop->keylen); + SDT_PROBE1(opencrypto, dev, ioctl, + error, __LINE__); + return (EINVAL); + } + break; #ifdef notdef case CRYPTO_MD5: thash = &auth_hash_md5; @@ -1003,12 +1025,13 @@ cryptodev_aead( } /* - * For GCM, crd_len covers only the AAD. For other ciphers + * For GCM/CCM, crd_len covers only the AAD. For other ciphers * chained with an HMAC, crd_len covers both the AAD and the * cipher text. */ crda->crd_skip = 0; - if (cse->cipher == CRYPTO_AES_NIST_GCM_16) + if (cse->cipher == CRYPTO_AES_NIST_GCM_16 || + cse->cipher == CRYPTO_AES_CCM_16) crda->crd_len = caead->aadlen; else crda->crd_len = caead->aadlen + caead->len; diff --git a/sys/opencrypto/cryptodev.h b/sys/opencrypto/cryptodev.h index 336271a1925..bd71e518c57 100644 --- a/sys/opencrypto/cryptodev.h +++ b/sys/opencrypto/cryptodev.h @@ -133,6 +133,7 @@ #define ARC4_IV_LEN 1 #define AES_GCM_IV_LEN 12 +#define AES_CCM_IV_LEN 12 #define AES_XTS_IV_LEN 8 #define AES_XTS_ALPHA 0x87 /* GF(2^128) generator polynomial */ @@ -204,7 +205,8 @@ #define CRYPTO_SHA2_512 37 #define CRYPTO_POLY1305 38 #define CRYPTO_AES_CCM_CBC_MAC 39 /* auth side */ -#define CRYPTO_ALGORITHM_MAX 39 /* Keep updated - see below */ +#define CRYPTO_AES_CCM_16 40 /* cipher side */ +#define CRYPTO_ALGORITHM_MAX 40 /* Keep updated - see below */ #define CRYPTO_ALGO_VALID(x) ((x) >= CRYPTO_ALGORITHM_MIN && \ (x) <= CRYPTO_ALGORITHM_MAX) diff --git a/sys/opencrypto/cryptosoft.c b/sys/opencrypto/cryptosoft.c index 431628df198..2d064ffa15e 100644 --- a/sys/opencrypto/cryptosoft.c +++ b/sys/opencrypto/cryptosoft.c @@ -62,6 +62,9 @@ __FBSDID("$FreeBSD$"); #include #include "cryptodev_if.h" +_Static_assert(AES_CCM_IV_LEN == AES_GCM_IV_LEN, + "AES_GCM_IV_LEN must currently be the same as AES_CCM_IV_LEN"); + static int32_t swcr_id; u_int8_t hmac_ipad_buffer[HMAC_MAX_BLOCK_LEN]; @@ -506,6 +509,7 @@ swcr_authenc(struct cryptop *crp) caddr_t buf = (caddr_t)crp->crp_buf; uint32_t *blkp; int aadlen, blksz, i, ivlen, len, iskip, oskip, r; + int isccm = 0; ivlen = blksz = iskip = oskip = 0; @@ -520,13 +524,18 @@ swcr_authenc(struct cryptop *crp) sw = &ses->swcr_algorithms[i]; switch (sw->sw_alg) { + case CRYPTO_AES_CCM_16: case CRYPTO_AES_NIST_GCM_16: case CRYPTO_AES_NIST_GMAC: swe = sw; crde = crd; exf = swe->sw_exf; - ivlen = 12; + /* AES_CCM_IV_LEN and AES_GCM_IV_LEN are both 12 */ + ivlen = AES_CCM_IV_LEN; break; + case CRYPTO_AES_CCM_CBC_MAC: + isccm = 1; + /* FALLTHROUGH */ case CRYPTO_AES_128_NIST_GMAC: case CRYPTO_AES_192_NIST_GMAC: case CRYPTO_AES_256_NIST_GMAC: @@ -544,8 +553,26 @@ swcr_authenc(struct cryptop *crp) } if (crde == NULL || crda == NULL) return (EINVAL); + /* + * We need to make sure that the auth algorithm matches the + * encr algorithm. Specifically, for AES-GCM must go with + * AES NIST GMAC, and AES-CCM must go with CBC-MAC. + */ + if (crde->crd_alg == CRYPTO_AES_NIST_GCM_16) { + switch (crda->crd_alg) { + case CRYPTO_AES_128_NIST_GMAC: + case CRYPTO_AES_192_NIST_GMAC: + case CRYPTO_AES_256_NIST_GMAC: + break; /* Good! */ + default: + return (EINVAL); /* Not good! */ + } + } else if (crde->crd_alg == CRYPTO_AES_CCM_16 && + crda->crd_alg != CRYPTO_AES_CCM_CBC_MAC) + return (EINVAL); - if (crde->crd_alg == CRYPTO_AES_NIST_GCM_16 && + if ((crde->crd_alg == CRYPTO_AES_NIST_GCM_16 || + crde->crd_alg == CRYPTO_AES_CCM_16) && (crde->crd_flags & CRD_F_IV_EXPLICIT) == 0) return (EINVAL); @@ -576,6 +603,15 @@ swcr_authenc(struct cryptop *crp) } } + if (swa->sw_alg == CRYPTO_AES_CCM_CBC_MAC) { + /* + * AES CCM-CBC needs to know the length of + * both the auth data, and payload data, before + * doing the auth computation. + */ + ctx.aes_cbc_mac_ctx.authDataLength = crda->crd_len; + ctx.aes_cbc_mac_ctx.cryptDataLength = crde->crd_len; + } /* Supply MAC with IV */ if (axf->Reinit) axf->Reinit(&ctx, iv, ivlen); @@ -610,16 +646,30 @@ swcr_authenc(struct cryptop *crp) bzero(blk, blksz); crypto_copydata(crp->crp_flags, buf, crde->crd_skip + i, len, blk); + /* + * One of the problems with CCM+CBC is that the authentication + * is done on the unecncrypted data. As a result, we have + * to do the authentication update at different times, + * depending on whether it's CCM or not. + */ if (crde->crd_flags & CRD_F_ENCRYPT) { + if (isccm) + axf->Update(&ctx, blk, len); if (exf->encrypt_multi != NULL) exf->encrypt_multi(swe->sw_kschedule, blk, len); else exf->encrypt(swe->sw_kschedule, blk); - axf->Update(&ctx, blk, len); + if (!isccm) + axf->Update(&ctx, blk, len); crypto_copyback(crp->crp_flags, buf, crde->crd_skip + i, len, blk); } else { + if (isccm) { + KASSERT(exf->encrypt_multi == NULL, + ("assume CCM is single-block only")); + exf->decrypt(swe->sw_kschedule, blk); + } axf->Update(&ctx, blk, len); } } @@ -650,6 +700,11 @@ swcr_authenc(struct cryptop *crp) r = timingsafe_bcmp(aalg, uaalg, axf->hashsize); if (r == 0) { /* tag matches, decrypt data */ + if (isccm) { + KASSERT(exf->reinit != NULL, + ("AES-CCM reinit function must be set")); + exf->reinit(swe->sw_kschedule, iv); + } for (i = 0; i < crde->crd_len; i += blksz) { len = MIN(crde->crd_len - i, blksz); if (len < blksz) @@ -799,6 +854,9 @@ swcr_newsession(device_t dev, crypto_session_t cses, struct cryptoini *cri) case CRYPTO_AES_NIST_GCM_16: txf = &enc_xform_aes_nist_gcm; goto enccommon; + case CRYPTO_AES_CCM_16: + txf = &enc_xform_ccm; + goto enccommon; case CRYPTO_AES_NIST_GMAC: txf = &enc_xform_aes_nist_gmac; swd->sw_exf = txf; @@ -943,6 +1001,22 @@ swcr_newsession(device_t dev, crypto_session_t cses, struct cryptoini *cri) swd->sw_axf = axf; break; + case CRYPTO_AES_CCM_CBC_MAC: + switch (cri->cri_klen) { + case 128: + axf = &auth_hash_ccm_cbc_mac_128; + break; + case 192: + axf = &auth_hash_ccm_cbc_mac_192; + break; + case 256: + axf = &auth_hash_ccm_cbc_mac_256; + break; + default: + swcr_freesession(dev, cses); + return EINVAL; + } + goto auth4common; case CRYPTO_AES_128_NIST_GMAC: axf = &auth_hash_nist_gmac_aes_128; goto auth4common; @@ -1042,6 +1116,7 @@ swcr_freesession(device_t dev, crypto_session_t cses) case CRYPTO_CAMELLIA_CBC: case CRYPTO_NULL_CBC: case CRYPTO_CHACHA20: + case CRYPTO_AES_CCM_16: txf = swd->sw_exf; if (swd->sw_kschedule) @@ -1056,6 +1131,7 @@ swcr_freesession(device_t dev, crypto_session_t cses) case CRYPTO_SHA2_512_HMAC: case CRYPTO_RIPEMD160_HMAC: case CRYPTO_NULL_HMAC: + case CRYPTO_AES_CCM_CBC_MAC: axf = swd->sw_axf; if (swd->sw_ictx) { @@ -1201,6 +1277,8 @@ swcr_process(device_t dev, struct cryptop *crp, int hint) case CRYPTO_AES_128_NIST_GMAC: case CRYPTO_AES_192_NIST_GMAC: case CRYPTO_AES_256_NIST_GMAC: + case CRYPTO_AES_CCM_16: + case CRYPTO_AES_CCM_CBC_MAC: crp->crp_etype = swcr_authenc(crp); goto done; @@ -1291,6 +1369,8 @@ swcr_attach(device_t dev) REGISTER(CRYPTO_BLAKE2B); REGISTER(CRYPTO_BLAKE2S); REGISTER(CRYPTO_CHACHA20); + REGISTER(CRYPTO_AES_CCM_16); + REGISTER(CRYPTO_AES_CCM_CBC_MAC); REGISTER(CRYPTO_POLY1305); #undef REGISTER diff --git a/sys/opencrypto/xform_aes_icm.c b/sys/opencrypto/xform_aes_icm.c index 8d3694fa23a..052be5a779a 100644 --- a/sys/opencrypto/xform_aes_icm.c +++ b/sys/opencrypto/xform_aes_icm.c @@ -57,6 +57,7 @@ static void aes_icm_crypt(caddr_t, u_int8_t *); static void aes_icm_zerokey(u_int8_t **); static void aes_icm_reinit(caddr_t, u_int8_t *); static void aes_gcm_reinit(caddr_t, u_int8_t *); +static void aes_ccm_reinit(caddr_t, u_int8_t *); /* Encryption instances */ struct enc_xform enc_xform_aes_icm = { @@ -79,6 +80,18 @@ struct enc_xform enc_xform_aes_nist_gcm = { aes_gcm_reinit, }; +struct enc_xform enc_xform_ccm = { + .type = CRYPTO_AES_CCM_16, + .name = "AES-CCM", + .blocksize = AES_ICM_BLOCK_LEN, .ivsize = AES_CCM_IV_LEN, + .minkey = AES_MIN_KEY, .maxkey = AES_MAX_KEY, + .encrypt = aes_icm_crypt, + .decrypt = aes_icm_crypt, + .setkey = aes_icm_setkey, + .zerokey = aes_icm_zerokey, + .reinit = aes_ccm_reinit, +}; + /* * Encryption wrapper routines. */ @@ -104,6 +117,21 @@ aes_gcm_reinit(caddr_t key, u_int8_t *iv) ctx->ac_block[AESICM_BLOCKSIZE - 1] = 2; } +static void +aes_ccm_reinit(caddr_t key, u_int8_t *iv) +{ + struct aes_icm_ctx *ctx; + + ctx = (struct aes_icm_ctx*)key; + + /* CCM has flags, then the IV, then the counter, which starts at 1 */ + bzero(ctx->ac_block, sizeof(ctx->ac_block)); + /* 3 bytes for length field; this gives a nonce of 12 bytes */ + ctx->ac_block[0] = (15 - AES_CCM_IV_LEN) - 1; + bcopy(iv, ctx->ac_block+1, AES_CCM_IV_LEN); + ctx->ac_block[AESICM_BLOCKSIZE - 1] = 1; +} + static void aes_icm_crypt(caddr_t key, u_int8_t *data) { diff --git a/sys/opencrypto/xform_auth.h b/sys/opencrypto/xform_auth.h index 9af0f8e6d15..9b0726257d7 100644 --- a/sys/opencrypto/xform_auth.h +++ b/sys/opencrypto/xform_auth.h @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -85,6 +86,9 @@ extern struct auth_hash auth_hash_nist_gmac_aes_256; extern struct auth_hash auth_hash_blake2b; extern struct auth_hash auth_hash_blake2s; extern struct auth_hash auth_hash_poly1305; +extern struct auth_hash auth_hash_ccm_cbc_mac_128; +extern struct auth_hash auth_hash_ccm_cbc_mac_192; +extern struct auth_hash auth_hash_ccm_cbc_mac_256; union authctx { MD5_CTX md5ctx; @@ -95,6 +99,7 @@ union authctx { SHA384_CTX sha384ctx; SHA512_CTX sha512ctx; struct aes_gmac_ctx aes_gmac_ctx; + struct aes_cbc_mac_ctx aes_cbc_mac_ctx; }; #endif /* _CRYPTO_XFORM_AUTH_H_ */ diff --git a/sys/opencrypto/xform_enc.h b/sys/opencrypto/xform_enc.h index 545e0ec2549..2797ca980fb 100644 --- a/sys/opencrypto/xform_enc.h +++ b/sys/opencrypto/xform_enc.h @@ -84,6 +84,7 @@ extern struct enc_xform enc_xform_aes_xts; extern struct enc_xform enc_xform_arc4; extern struct enc_xform enc_xform_camellia; extern struct enc_xform enc_xform_chacha20; +extern struct enc_xform enc_xform_ccm; struct aes_icm_ctx { u_int32_t ac_ek[4*(RIJNDAEL_MAXNR + 1)]; diff --git a/tools/tools/crypto/cryptocheck.c b/tools/tools/crypto/cryptocheck.c index c8a1be69a3a..47c6bc0cfcf 100644 --- a/tools/tools/crypto/cryptocheck.c +++ b/tools/tools/crypto/cryptocheck.c @@ -105,6 +105,9 @@ * aes-gcm 128-bit aes gcm * aes-gcm192 192-bit aes gcm * aes-gcm256 256-bit aes gcm + * aes-ccm 128-bit aes ccm + * aes-ccm192 192-bit aes ccm + * aes-ccm256 256-bit aes ccm */ #include @@ -131,7 +134,7 @@ struct alg { const char *name; int cipher; int mac; - enum { T_HASH, T_HMAC, T_BLKCIPHER, T_AUTHENC, T_GCM } type; + enum { T_HASH, T_HMAC, T_BLKCIPHER, T_AUTHENC, T_GCM, T_CCM } type; const EVP_CIPHER *(*evp_cipher)(void); const EVP_MD *(*evp_md)(void); } algs[] = { @@ -186,6 +189,15 @@ struct alg { { .name = "aes-gcm256", .cipher = CRYPTO_AES_NIST_GCM_16, .mac = CRYPTO_AES_256_NIST_GMAC, .type = T_GCM, .evp_cipher = EVP_aes_256_gcm }, + { .name = "aes-ccm", .cipher = CRYPTO_AES_CCM_16, + .mac = CRYPTO_AES_CCM_CBC_MAC, .type = T_CCM, + .evp_cipher = EVP_aes_128_ccm }, + { .name = "aes-ccm192", .cipher = CRYPTO_AES_CCM_16, + .mac = CRYPTO_AES_CCM_CBC_MAC, .type = T_CCM, + .evp_cipher = EVP_aes_192_ccm }, + { .name = "aes-ccm256", .cipher = CRYPTO_AES_CCM_16, + .mac = CRYPTO_AES_CCM_CBC_MAC, .type = T_CCM, + .evp_cipher = EVP_aes_256_ccm }, }; static bool verbose; @@ -1158,6 +1170,214 @@ out: free(key); } +static void +openssl_ccm_encrypt(struct alg *alg, const EVP_CIPHER *cipher, const char *key, + const char *iv, size_t iv_len, const char *aad, size_t aad_len, + const char *input, char *output, size_t size, char *tag) +{ + EVP_CIPHER_CTX *ctx; + int outl, total; + + ctx = EVP_CIPHER_CTX_new(); + if (ctx == NULL) + errx(1, "OpenSSL %s (%zu) ctx new failed: %s", alg->name, + size, ERR_error_string(ERR_get_error(), NULL)); + if (EVP_EncryptInit_ex(ctx, cipher, NULL, NULL, NULL) != 1) + errx(1, "OpenSSL %s (%zu) ctx init failed: %s", alg->name, + size, ERR_error_string(ERR_get_error(), NULL)); + if (EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_CCM_SET_TAG, AES_CBC_MAC_HASH_LEN, NULL) != 1) + errx(1, "OpenSSL %s (%zu) setting tag length failed: %s", alg->name, + size, ERR_error_string(ERR_get_error(), NULL)); + if (EVP_EncryptInit_ex(ctx, NULL, NULL, (const u_char *)key, + (const u_char *)iv) != 1) + errx(1, "OpenSSL %s (%zu) ctx init failed: %s", alg->name, + size, ERR_error_string(ERR_get_error(), NULL)); + if (EVP_EncryptUpdate(ctx, NULL, &outl, NULL, size) != 1) + errx(1, "OpenSSL %s (%zu) unable to set data length: %s", alg->name, + size, ERR_error_string(ERR_get_error(), NULL)); + + if (aad != NULL) { + if (EVP_EncryptUpdate(ctx, NULL, &outl, (const u_char *)aad, + aad_len) != 1) + errx(1, "OpenSSL %s (%zu) aad update failed: %s", + alg->name, size, + ERR_error_string(ERR_get_error(), NULL)); + } + if (EVP_EncryptUpdate(ctx, (u_char *)output, &outl, + (const u_char *)input, size) != 1) + errx(1, "OpenSSL %s (%zu) encrypt update failed: %s", alg->name, + size, ERR_error_string(ERR_get_error(), NULL)); + total = outl; + if (EVP_EncryptFinal_ex(ctx, (u_char *)output + outl, &outl) != 1) + errx(1, "OpenSSL %s (%zu) encrypt final failed: %s", alg->name, + size, ERR_error_string(ERR_get_error(), NULL)); + total += outl; + if (total != size) + errx(1, "OpenSSL %s (%zu) encrypt size mismatch: %d", alg->name, + size, total); + if (EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_CCM_GET_TAG, AES_CBC_MAC_HASH_LEN, + tag) != 1) + errx(1, "OpenSSL %s (%zu) get tag failed: %s", alg->name, + size, ERR_error_string(ERR_get_error(), NULL)); + EVP_CIPHER_CTX_free(ctx); +} + +static bool +ocf_ccm(struct alg *alg, const char *key, size_t key_len, const char *iv, + size_t iv_len, const char *aad, size_t aad_len, const char *input, + char *output, size_t size, char *tag, int enc, int *cridp) +{ + struct session2_op sop; + struct crypt_aead caead; + int fd; + bool rv; + + memset(&sop, 0, sizeof(sop)); + memset(&caead, 0, sizeof(caead)); + sop.crid = crid; + sop.keylen = key_len; + sop.key = (char *)key; + sop.cipher = alg->cipher; + sop.mackeylen = key_len; + sop.mackey = (char *)key; + sop.mac = alg->mac; + fd = crget(); + if (ioctl(fd, CIOCGSESSION2, &sop) < 0) { + warn("cryptodev %s not supported for device %s", + alg->name, crfind(crid)); + close(fd); + return (false); + } + + caead.ses = sop.ses; + caead.op = enc ? COP_ENCRYPT : COP_DECRYPT; + caead.len = size; + caead.aadlen = aad_len; + caead.ivlen = iv_len; + caead.src = (char *)input; + caead.dst = output; + caead.aad = (char *)aad; + caead.tag = tag; + caead.iv = (char *)iv; + + if (ioctl(fd, CIOCCRYPTAEAD, &caead) < 0) { + warn("cryptodev %s (%zu) failed for device %s", + alg->name, size, crfind(crid)); + rv = false; + } else + rv = true; + + if (ioctl(fd, CIOCFSESSION, &sop.ses) < 0) + warn("ioctl(CIOCFSESSION)"); + + close(fd); + *cridp = sop.crid; + return (rv); +} + +static void +run_ccm_test(struct alg *alg, size_t size) +{ + const EVP_CIPHER *cipher; + char *aad, *buffer, *cleartext, *ciphertext; + char *iv, *key; + u_int iv_len, key_len; + int crid; + char control_tag[AES_CBC_MAC_HASH_LEN], test_tag[AES_CBC_MAC_HASH_LEN]; + + cipher = alg->evp_cipher(); + if (size % EVP_CIPHER_block_size(cipher) != 0) { + if (verbose) + printf( + "%s (%zu): invalid buffer size (block size %d)\n", + alg->name, size, EVP_CIPHER_block_size(cipher)); + return; + } + + memset(control_tag, 0x3c, sizeof(control_tag)); + memset(test_tag, 0x3c, sizeof(test_tag)); + + /* + * We only have one algorithm constant for CBC-MAC; however, the + * alg structure uses the different openssl types, which gives us + * the key length. We need that for the OCF code. + */ + key_len = EVP_CIPHER_key_length(cipher); + + /* + * AES-CCM can have varying IV lengths; however, for the moment + * we only support AES_CCM_IV_LEN (12). So if the sizes are + * different, we'll fail. + */ + iv_len = EVP_CIPHER_iv_length(cipher); + if (iv_len != AES_CCM_IV_LEN) { + if (verbose) + printf("OpenSSL CCM IV length (%d) != AES_CCM_IV_LEN", + iv_len); + return; + } + + key = alloc_buffer(key_len); + iv = generate_iv(iv_len, alg); + cleartext = alloc_buffer(size); + buffer = malloc(size); + ciphertext = malloc(size); + if (aad_len != 0) + aad = alloc_buffer(aad_len); + else + aad = NULL; + + /* OpenSSL encrypt */ + openssl_ccm_encrypt(alg, cipher, key, iv, iv_len, aad, aad_len, cleartext, + ciphertext, size, control_tag); + + /* OCF encrypt */ + if (!ocf_ccm(alg, key, key_len, iv, iv_len, aad, aad_len, cleartext, + buffer, size, test_tag, 1, &crid)) + goto out; + if (memcmp(ciphertext, buffer, size) != 0) { + printf("%s (%zu) encryption mismatch:\n", alg->name, size); + printf("control:\n"); + hexdump(ciphertext, size, NULL, 0); + printf("test (cryptodev device %s):\n", crfind(crid)); + hexdump(buffer, size, NULL, 0); + goto out; + } + if (memcmp(control_tag, test_tag, sizeof(control_tag)) != 0) { + printf("%s (%zu) enc tag mismatch:\n", alg->name, size); + printf("control:\n"); + hexdump(control_tag, sizeof(control_tag), NULL, 0); + printf("test (cryptodev device %s):\n", crfind(crid)); + hexdump(test_tag, sizeof(test_tag), NULL, 0); + goto out; + } + + /* OCF decrypt */ + if (!ocf_ccm(alg, key, key_len, iv, iv_len, aad, aad_len, ciphertext, + buffer, size, control_tag, 0, &crid)) + goto out; + if (memcmp(cleartext, buffer, size) != 0) { + printf("%s (%zu) decryption mismatch:\n", alg->name, size); + printf("control:\n"); + hexdump(cleartext, size, NULL, 0); + printf("test (cryptodev device %s):\n", crfind(crid)); + hexdump(buffer, size, NULL, 0); + goto out; + } + + if (verbose) + printf("%s (%zu) matched (cryptodev device %s)\n", + alg->name, size, crfind(crid)); + +out: + free(aad); + free(ciphertext); + free(buffer); + free(cleartext); + free(iv); + free(key); +} + static void run_test(struct alg *alg, size_t size) { @@ -1178,6 +1398,9 @@ run_test(struct alg *alg, size_t size) case T_GCM: run_gcm_test(alg, size); break; + case T_CCM: + run_ccm_test(alg, size); + break; } } @@ -1247,7 +1470,8 @@ run_aead_tests(size_t *sizes, u_int nsizes) u_int i; for (i = 0; i < nitems(algs); i++) - if (algs[i].type == T_GCM) + if (algs[i].type == T_GCM || + algs[i].type == T_CCM) run_test_sizes(&algs[i], sizes, nsizes); } From 72309077ebc4b0fad0057ea3a4ede6d894878b76 Mon Sep 17 00:00:00 2001 From: Sean Eric Fagan Date: Fri, 15 Feb 2019 04:01:59 +0000 Subject: [PATCH 85/93] Pasting in a source control line missed the last quote. Fixed. --- sys/opencrypto/cbc_mac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/opencrypto/cbc_mac.c b/sys/opencrypto/cbc_mac.c index e75e5df369b..93f4d185458 100644 --- a/sys/opencrypto/cbc_mac.c +++ b/sys/opencrypto/cbc_mac.c @@ -23,7 +23,7 @@ */ #include -__FBSDID("$FreeBSD$); +__FBSDID("$FreeBSD$"); #include #include From 1357a3bc19a3094c6ddd0a477e70b4239fee63d9 Mon Sep 17 00:00:00 2001 From: Sean Eric Fagan Date: Fri, 15 Feb 2019 04:15:43 +0000 Subject: [PATCH 86/93] Fix another issue from r344141, having to do with size of a shift amount. This did not show up in my testing. Differential Revision: https://reviews.freebsd.org/D18592 --- sys/opencrypto/cbc_mac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/opencrypto/cbc_mac.c b/sys/opencrypto/cbc_mac.c index 93f4d185458..bd7f9cb00d7 100644 --- a/sys/opencrypto/cbc_mac.c +++ b/sys/opencrypto/cbc_mac.c @@ -128,7 +128,7 @@ AES_CBC_MAC_Reinit(struct aes_cbc_mac_ctx *ctx, const uint8_t *nonce, uint16_t n uint16_t sizeVal = htobe16(ctx->authDataLength); bcopy(&sizeVal, ctx->staging_block, sizeof(sizeVal)); ctx->blockIndex = sizeof(sizeVal); - } else if (ctx->authDataLength < (1UL<<32)) { + } else if (ctx->authDataLength < (1ULL<<32)) { uint32_t sizeVal = htobe32(ctx->authDataLength); ctx->staging_block[0] = 0xff; ctx->staging_block[1] = 0xfe; From e82fdca156aaeb03cdb9555d4cb6228e6209092f Mon Sep 17 00:00:00 2001 From: Michael Tuexen Date: Fri, 15 Feb 2019 09:45:17 +0000 Subject: [PATCH 87/93] Fix a byte ordering issue for the advertised receiver window in ACK segments sent in TIMEWAIT state, which I introduced in r336937. MFC after: 3 days Sponsored by: Netflix, Inc. --- sys/netinet/tcp_timewait.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/netinet/tcp_timewait.c b/sys/netinet/tcp_timewait.c index 0384fc54ca8..04729763a85 100644 --- a/sys/netinet/tcp_timewait.c +++ b/sys/netinet/tcp_timewait.c @@ -302,7 +302,7 @@ tcp_twstart(struct tcpcb *tp) if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt) && recwin < (tp->rcv_adv - tp->rcv_nxt)) recwin = (tp->rcv_adv - tp->rcv_nxt); - tw->last_win = htons((u_short)(recwin >> tp->rcv_scale)); + tw->last_win = (u_short)(recwin >> tp->rcv_scale); /* * Set t_recent if timestamps are used on the connection. From c51a229ca754eba7e83a4a5e8c18a3517263ba2f Mon Sep 17 00:00:00 2001 From: Marcin Wojtas Date: Fri, 15 Feb 2019 10:34:27 +0000 Subject: [PATCH 88/93] Fix validation of the Rx OOO completion in the ENA Requested ID should be validated when the packet is received and not when the driver is repopulating the mbufs. Submitted by: Michal Krawczyk Obtained from: Semihalf Sponsored by: Amazon, Inc. MFC after: 1 week --- sys/dev/ena/ena.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/sys/dev/ena/ena.c b/sys/dev/ena/ena.c index 12a9e25d307..6a82d9a15fc 100644 --- a/sys/dev/ena/ena.c +++ b/sys/dev/ena/ena.c @@ -1046,10 +1046,6 @@ ena_refill_rx_bufs(struct ena_ring *rx_ring, uint32_t num) "RX buffer - next to use: %d", next_to_use); req_id = rx_ring->free_rx_ids[next_to_use]; - rc = validate_rx_req_id(rx_ring, req_id); - if (unlikely(rc != 0)) - break; - rx_info = &rx_ring->rx_buffer_info[req_id]; rc = ena_alloc_rx_mbuf(adapter, rx_ring, rx_info); @@ -1472,6 +1468,7 @@ ena_rx_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_buf_info *ena_bufs, struct ena_rx_buffer *rx_info; struct ena_adapter *adapter; unsigned int descs = ena_rx_ctx->descs; + int rc; uint16_t ntc, len, req_id, buf = 0; ntc = *next_to_clean; @@ -1485,6 +1482,10 @@ ena_rx_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_buf_info *ena_bufs, len = ena_bufs[buf].len; req_id = ena_bufs[buf].req_id; + rc = validate_rx_req_id(rx_ring, req_id); + if (unlikely(rc != 0)) + return (NULL); + rx_info = &rx_ring->rx_buffer_info[req_id]; ena_trace(ENA_DBG | ENA_RXPTH, "rx_info %p, mbuf %p, paddr %jx", @@ -1517,6 +1518,16 @@ ena_rx_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_buf_info *ena_bufs, ++buf; len = ena_bufs[buf].len; req_id = ena_bufs[buf].req_id; + rc = validate_rx_req_id(rx_ring, req_id); + if (unlikely(rc != 0)) { + /* + * If the req_id is invalid, then the device will be + * reset. In that case we must free all mbufs that + * were already gathered. + */ + m_freem(mbuf); + return (NULL); + } rx_info = &rx_ring->rx_buffer_info[req_id]; if (unlikely(rx_info->mbuf == NULL)) { From 1d65b4c095cdb9e511857c97556403851f517118 Mon Sep 17 00:00:00 2001 From: Marcin Wojtas Date: Fri, 15 Feb 2019 10:40:41 +0000 Subject: [PATCH 89/93] Do not use ntc for obtaining buffer on Rx in the ENA In out of order mode Rx buffer are accesses by req_id. Accessing and validating mbuf using ntc is causing false error. Increase driver revision after latest RX OOO completion fixes. Submitted by: Rafal Kozik Obtained from: Semihalf Sponsored by: Amazon, Inc. MFC after: 1 week --- sys/dev/ena/ena.c | 10 ++++------ sys/dev/ena/ena.h | 2 +- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/sys/dev/ena/ena.c b/sys/dev/ena/ena.c index 6a82d9a15fc..49213d12b9d 100644 --- a/sys/dev/ena/ena.c +++ b/sys/dev/ena/ena.c @@ -1473,12 +1473,6 @@ ena_rx_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_buf_info *ena_bufs, ntc = *next_to_clean; adapter = rx_ring->adapter; - rx_info = &rx_ring->rx_buffer_info[ntc]; - - if (unlikely(rx_info->mbuf == NULL)) { - device_printf(adapter->pdev, "NULL mbuf in rx_info"); - return (NULL); - } len = ena_bufs[buf].len; req_id = ena_bufs[buf].req_id; @@ -1487,6 +1481,10 @@ ena_rx_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_buf_info *ena_bufs, return (NULL); rx_info = &rx_ring->rx_buffer_info[req_id]; + if (unlikely(rx_info->mbuf == NULL)) { + device_printf(adapter->pdev, "NULL mbuf in rx_info"); + return (NULL); + } ena_trace(ENA_DBG | ENA_RXPTH, "rx_info %p, mbuf %p, paddr %jx", rx_info, rx_info->mbuf, (uintmax_t)rx_info->ena_buf.paddr); diff --git a/sys/dev/ena/ena.h b/sys/dev/ena/ena.h index b0ef5e47d81..574cd57b9cd 100644 --- a/sys/dev/ena/ena.h +++ b/sys/dev/ena/ena.h @@ -41,7 +41,7 @@ #define DRV_MODULE_VER_MAJOR 0 #define DRV_MODULE_VER_MINOR 8 -#define DRV_MODULE_VER_SUBMINOR 2 +#define DRV_MODULE_VER_SUBMINOR 3 #define DRV_MODULE_NAME "ena" From 85cf19adc5de9585cc49a0a97d73c2be89b96226 Mon Sep 17 00:00:00 2001 From: "Rodney W. Grimes" Date: Fri, 15 Feb 2019 16:20:21 +0000 Subject: [PATCH 90/93] In r340042 an attempt to quiet coverity warning cid 1305412 was overdone. nopt is the only allocated space, xopt and cp are aliases into that allocated space. Remove the 2 unneeded free's Reported by: Patrick Mooney (@pmooney_pfmooney.com) Reviewed by: jhb (maintainer), Patrick Mooney (joyent/illumos) Approved by: bde (mentor) CID: 1305412 MFC after: 3 days MFC with: 340042 Differential Revision: https://reviews.freebsd.org/D19200 --- usr.sbin/bhyve/block_if.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/usr.sbin/bhyve/block_if.c b/usr.sbin/bhyve/block_if.c index dc57217ddaa..c94548ba5fd 100644 --- a/usr.sbin/bhyve/block_if.c +++ b/usr.sbin/bhyve/block_if.c @@ -576,8 +576,6 @@ blockif_open(const char *optstr, const char *ident) err: if (fd >= 0) close(fd); - free(cp); - free(xopts); free(nopt); return (NULL); } From 11e67b92b5782066894420581b7046424f7d31e2 Mon Sep 17 00:00:00 2001 From: "Rodney W. Grimes" Date: Fri, 15 Feb 2019 16:48:15 +0000 Subject: [PATCH 91/93] In r340044 an attempt to quiet coverity warning cid 1357336 was incorrectly implemented leading to a possible double free. It is possible for both the conditional free, and the unconditional free added in r340044 to be done, fix that by initializing uopt to NULL, removing the conditional free, and only using the unconditional free at the end. Reported by: Patrick Mooney (patrick.mooney@joyent.com) Reviewed by: jhb (maintainer), Patrick Mooney (joyent/illumos) Approved by: bde (mentor) CID: 1357336 MFC after: 3 days MFC with: 340044 Differential Revision: https://reviews.freebsd.org/D19202 --- usr.sbin/bhyve/pci_xhci.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/usr.sbin/bhyve/pci_xhci.c b/usr.sbin/bhyve/pci_xhci.c index 3c0a798a011..de18739e957 100644 --- a/usr.sbin/bhyve/pci_xhci.c +++ b/usr.sbin/bhyve/pci_xhci.c @@ -2626,6 +2626,7 @@ pci_xhci_parse_opts(struct pci_xhci_softc *sc, char *opts) char *uopt, *xopts, *config; int usb3_port, usb2_port, i; + uopt = NULL; usb3_port = sc->usb3_port_start - 1; usb2_port = sc->usb2_port_start - 1; devices = NULL; @@ -2700,8 +2701,6 @@ pci_xhci_parse_opts(struct pci_xhci_softc *sc, char *opts) sc->ndevices++; } - if (uopt != NULL) - free(uopt); portsfinal: sc->portregs = calloc(XHCI_MAX_DEVS, sizeof(struct pci_xhci_portregs)); From e49f2c66d1289fccf4a99fa20acad4125fba8ce9 Mon Sep 17 00:00:00 2001 From: Kyle Evans Date: Fri, 15 Feb 2019 18:28:51 +0000 Subject: [PATCH 92/93] stand: dev_net: correct net_open's interpretation of params net_open previously casted the first vararg to a char * and this was half-OK: at first, it is passed to netif_open, which would cast it back to the struct devdesc * that it really is and use it properly. It is then strdup()d and used as the netdev_name, which is objectively wrong. Correct it so that the first vararg is properly casted to a struct devdesc * and the netdev_name gets set properly to make it more clear at a glance that it's not doing something horribly wrong. Reported by: mmel Reviewed by: imp, mmel, tsoome MFC after: 1 week Differential Revision: https://reviews.freebsd.org/D19206 --- stand/common/dev_net.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/stand/common/dev_net.c b/stand/common/dev_net.c index d461f747ada..1fa955ac1cf 100644 --- a/stand/common/dev_net.c +++ b/stand/common/dev_net.c @@ -122,13 +122,15 @@ net_open(struct open_file *f, ...) { struct iodesc *d; va_list args; - char *devname; /* Device part of file name (or NULL). */ + struct devdesc *dev; + const char *devname; /* Device part of file name (or NULL). */ int error = 0; va_start(args, f); - devname = va_arg(args, char*); + dev = va_arg(args, struct devdesc *); va_end(args); + devname = dev->d_dev->dv_name; /* Before opening another interface, close the previous one first. */ if (netdev_sock >= 0 && strcmp(devname, netdev_name) != 0) net_cleanup(); @@ -137,7 +139,7 @@ net_open(struct open_file *f, ...) if (netdev_opens == 0) { /* Find network interface. */ if (netdev_sock < 0) { - netdev_sock = netif_open(devname); + netdev_sock = netif_open(dev); if (netdev_sock < 0) { printf("net_open: netif_open() failed\n"); return (ENXIO); From ca62461bc6525f4d25d276714b4b0a2947e183a0 Mon Sep 17 00:00:00 2001 From: Stephen Hurd Date: Fri, 15 Feb 2019 18:51:43 +0000 Subject: [PATCH 93/93] iflib: Improve return values of interrupt handlers. iflib was returning FILTER_HANDLED, in cases where FILTER_STRAY was more correct. This potentially caused issues with shared legacy interrupts. Driver filters returning FILTER_STRAY are now properly handled. Submitted by: Augustin Cavalier Reviewed by: marius, gallatin Obtained from: Haiku (a84bb9, 4947d1) MFC after: 1 week Sponsored by: Limelight Networks Differential Revision: https://reviews.freebsd.org/D19201 --- sys/net/iflib.c | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/sys/net/iflib.c b/sys/net/iflib.c index fb0c420f1da..51d980cd102 100644 --- a/sys/net/iflib.c +++ b/sys/net/iflib.c @@ -1468,12 +1468,17 @@ iflib_fast_intr(void *arg) { iflib_filter_info_t info = arg; struct grouptask *gtask = info->ifi_task; + int result; + if (!iflib_started) - return (FILTER_HANDLED); + return (FILTER_STRAY); DBG_COUNTER_INC(fast_intrs); - if (info->ifi_filter != NULL && info->ifi_filter(info->ifi_filter_arg) == FILTER_HANDLED) - return (FILTER_HANDLED); + if (info->ifi_filter != NULL) { + result = info->ifi_filter(info->ifi_filter_arg); + if ((result & FILTER_SCHEDULE_THREAD) == 0) + return (result); + } GROUPTASK_ENQUEUE(gtask); return (FILTER_HANDLED); @@ -1488,15 +1493,18 @@ iflib_fast_intr_rxtx(void *arg) iflib_rxq_t rxq = (iflib_rxq_t)info->ifi_ctx; iflib_txq_t txq; void *sc; - int i, cidx; + int i, cidx, result; qidx_t txqid; if (!iflib_started) - return (FILTER_HANDLED); + return (FILTER_STRAY); DBG_COUNTER_INC(fast_intrs); - if (info->ifi_filter != NULL && info->ifi_filter(info->ifi_filter_arg) == FILTER_HANDLED) - return (FILTER_HANDLED); + if (info->ifi_filter != NULL) { + result = info->ifi_filter(info->ifi_filter_arg); + if ((result & FILTER_SCHEDULE_THREAD) == 0) + return (result); + } ctx = rxq->ifr_ctx; sc = ctx->ifc_softc; @@ -1531,13 +1539,17 @@ iflib_fast_intr_ctx(void *arg) { iflib_filter_info_t info = arg; struct grouptask *gtask = info->ifi_task; + int result; if (!iflib_started) - return (FILTER_HANDLED); + return (FILTER_STRAY); DBG_COUNTER_INC(fast_intrs); - if (info->ifi_filter != NULL && info->ifi_filter(info->ifi_filter_arg) == FILTER_HANDLED) - return (FILTER_HANDLED); + if (info->ifi_filter != NULL) { + result = info->ifi_filter(info->ifi_filter_arg); + if ((result & FILTER_SCHEDULE_THREAD) == 0) + return (result); + } GROUPTASK_ENQUEUE(gtask); return (FILTER_HANDLED);